summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-01-31 03:07:46 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-01-31 03:07:46 +0000
commitf84e5d9a2788d04b3f64e13fdb38c4172017a9c1 (patch)
treebf4c117dcf7fdc1c9aa00bceaaf14e046594d29e /llvm/test/CodeGen/AMDGPU
parentbc332648e8e3ce090c4ace3240809c349673d1a3 (diff)
downloadbcm5719-llvm-f84e5d9a2788d04b3f64e13fdb38c4172017a9c1.tar.gz
bcm5719-llvm-f84e5d9a2788d04b3f64e13fdb38c4172017a9c1.zip
AMDGPU: Generalize matching of v_med3_f32
I think this is safe as long as no inputs are known to ever be nans. Also add an intrinsic for fmed3 to be able to handle all safe math cases. llvm-svn: 293598
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/fmed3.ll738
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll28
2 files changed, 760 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index 44889c9c472..0213acdc18e 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -1,12 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN %s
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare float @llvm.minnum.f32(float, float) #0
-declare float @llvm.maxnum.f32(float, float) #0
-declare double @llvm.minnum.f64(double, double) #0
-declare double @llvm.maxnum.f64(double, double) #0
-
; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f32:
; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, v{{[0-9]+}}
; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 2.0, 4.0
@@ -165,6 +159,738 @@ define void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrs
ret void
}
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], [[B]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %a.fneg = fsub float -0.0, %a
+ %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod1:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], -[[B]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat0_srcmod1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %b.fneg = fsub float -0.0, %b
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg)
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod2:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], -[[C]]
+define void @v_test_global_nnans_med3_f32_pat0_srcmod2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %c.fneg = fsub float -0.0, %c
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod012:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], |[[B]]|, -|[[C]]|
+define void @v_test_global_nnans_med3_f32_pat0_srcmod012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+
+ %a.fneg = fsub float -0.0, %a
+ %b.fabs = call float @llvm.fabs.f32(float %b)
+ %c.fabs = call float @llvm.fabs.f32(float %c)
+ %c.fabs.fneg = fsub float -0.0, %c.fabs
+
+ %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
+ %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_negabs012:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, -|[[A]]|, -|[[B]]|, -|[[C]]|
+define void @v_test_global_nnans_med3_f32_pat0_negabs012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+
+ %a.fabs = call float @llvm.fabs.f32(float %a)
+ %a.fabs.fneg = fsub float -0.0, %a.fabs
+ %b.fabs = call float @llvm.fabs.f32(float %b)
+ %b.fabs.fneg = fsub float -0.0, %b.fabs
+ %c.fabs = call float @llvm.fabs.f32(float %c)
+ %c.fabs.fneg = fsub float -0.0, %c.fabs
+
+ %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
+ %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_nnan_inputs_med3_f32_pat0:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN-DAG: v_add_f32_e32 [[A_ADD:v[0-9]+]], 1.0, [[A]]
+; GCN-DAG: v_add_f32_e32 [[B_ADD:v[0-9]+]], 2.0, [[B]]
+; GCN-DAG: v_add_f32_e32 [[C_ADD:v[0-9]+]], 4.0, [[C]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[A_ADD]], [[B_ADD]], [[C_ADD]]
+define void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+
+ %a.nnan = fadd nnan float %a, 1.0
+ %b.nnan = fadd nnan float %b, 2.0
+ %c.nnan = fadd nnan float %c, 4.0
+
+ %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
+ %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; 16 combinations
+
+; 0: max(min(x, y), min(max(x, y), z))
+; 1: max(min(x, y), min(max(y, x), z))
+; 2: max(min(x, y), min(z, max(x, y)))
+; 3: max(min(x, y), min(z, max(y, x)))
+; 4: max(min(y, x), min(max(x, y), z))
+; 5: max(min(y, x), min(max(y, x), z))
+; 6: max(min(y, x), min(z, max(x, y)))
+; 7: max(min(y, x), min(z, max(y, x)))
+;
+; + commute outermost max
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat1:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat2:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat3:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat3(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat4:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat4(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat5:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat5(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat6:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat6(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat7:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat7(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat8:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat8(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat9:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat9(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat10:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat10(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat11:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat11(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat12:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat12(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat13:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat13(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat14:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat14(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat15:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
+define void @v_test_global_nnans_med3_f32_pat15(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
+ %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
+ %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
+ %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; ---------------------------------------------------------------------
+; Negative patterns
+; ---------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use0:
+; GCN: v_min_f32
+; GCN: v_max_f32
+; GCN: v_min_f32
+; GCN: v_max_f32
+define void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ store volatile float %tmp0, float addrspace(1)* undef
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use1:
+define void @v_test_safe_med3_f32_pat0_multi_use1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+ store volatile float %tmp1, float addrspace(1)* undef
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use2:
+define void @v_test_safe_med3_f32_pat0_multi_use2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ store volatile float %tmp2, float addrspace(1)* undef
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0:
+define void @v_test_safe_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_nnan_inputs_missing0_med3_f32_pat0:
+define void @v_nnan_inputs_missing0_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+
+ %a.nnan = fadd float %a, 1.0
+ %b.nnan = fadd nnan float %b, 2.0
+ %c.nnan = fadd nnan float %c, 4.0
+
+ %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
+ %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_nnan_inputs_missing1_med3_f32_pat0:
+define void @v_nnan_inputs_missing1_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+
+ %a.nnan = fadd nnan float %a, 1.0
+ %b.nnan = fadd float %b, 2.0
+ %c.nnan = fadd nnan float %c, 4.0
+
+ %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
+ %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_nnan_inputs_missing2_med3_f32_pat0:
+define void @v_nnan_inputs_missing2_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+
+ %a.nnan = fadd nnan float %a, 1.0
+ %b.nnan = fadd nnan float %b, 2.0
+ %c.nnan = fadd float %c, 4.0
+
+ %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
+ %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_min_f32
+; GCN: v_max_f32
+; GCN: v_min_f32
+; GCN: v_max_f32
+define void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %a.fneg = fsub float -0.0, %a
+ %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
+ %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
+ %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
+ %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+ store float %med3, float addrspace(1)* %outgep
+ ret void
+}
+
+; A simple min and max is not sufficient
+; GCN-LABEL: {{^}}v_test_global_nnans_min_max_f32:
+; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]]
+; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]]
+; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], [[B]], [[A]]
+; GCN: v_min_f32_e32 v{{[0-9]+}}, [[C]], [[MAX]]
+define void @v_test_global_nnans_min_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load volatile float, float addrspace(1)* %gep0
+ %b = load volatile float, float addrspace(1)* %gep1
+ %c = load volatile float, float addrspace(1)* %gep2
+ %max = call float @llvm.maxnum.f32(float %a, float %b)
+ %minmax = call float @llvm.minnum.f32(float %max, float %c)
+ store float %minmax, float addrspace(1)* %outgep
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare float @llvm.fabs.f32(float) #0
+declare float @llvm.minnum.f32(float, float) #0
+declare float @llvm.maxnum.f32(float, float) #0
+declare double @llvm.minnum.f64(double, double) #0
+declare double @llvm.maxnum.f64(double, double) #0
+
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
new file mode 100644
index 00000000000..010599d3b29
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}test_fmed3:
+; GCN: v_med3_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+define void @test_fmed3(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
+ %mad = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
+ store float %mad, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_fmed3_srcmods:
+; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, |v{{[0-9]+}}|, -|v{{[0-9]+}}|
+define void @test_fmed3_srcmods(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
+ %src0.fneg = fsub float -0.0, %src0
+ %src1.fabs = call float @llvm.fabs.f32(float %src1)
+ %src2.fabs = call float @llvm.fabs.f32(float %src2)
+ %src2.fneg.fabs = fsub float -0.0, %src2.fabs
+ %mad = call float @llvm.amdgcn.fmed3.f32(float %src0.fneg, float %src1.fabs, float %src2.fneg.fabs)
+ store float %mad, float addrspace(1)* %out
+ ret void
+}
+
+declare float @llvm.amdgcn.fmed3.f32(float, float, float) #0
+declare float @llvm.fabs.f32(float) #0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
OpenPOWER on IntegriCloud