diff options
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 21 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/smed3.ll | 22 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/umed3.ll | 21 |
3 files changed, 60 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index cc0b978ea3c..11136e0a1df 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1650,20 +1650,33 @@ class FP16Med3Pat<ValueType vt, (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE) >; -class Int16Med3Pat<Instruction med3Inst, +multiclass Int16Med3Pat<Instruction med3Inst, + SDPatternOperator min, SDPatternOperator max, SDPatternOperator max_oneuse, SDPatternOperator min_oneuse, - ValueType vt = i32> : GCNPat< + ValueType vt = i16> { + // This matches 16 permutations of + // max(min(x, y), min(max(x, y), z)) + def : GCNPat < (max (min_oneuse vt:$src0, vt:$src1), (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) >; + // This matches 16 permutations of + // min(max(a, b), max(min(a, b), c)) + def : GCNPat < + (min (max_oneuse vt:$src0, vt:$src1), + (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)), + (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) +>; +} + def : FPMed3Pat<f32, V_MED3_F32>; let OtherPredicates = [isGFX9] in { def : FP16Med3Pat<f16, V_MED3_F16>; -def : Int16Med3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>; -def : Int16Med3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>; +defm : Int16Med3Pat<V_MED3_I16, smin, smax, smax_oneuse, smin_oneuse>; +defm : Int16Med3Pat<V_MED3_U16, umin, umax, umax_oneuse, umin_oneuse>; } // End Predicates = [isGFX9] diff --git a/llvm/test/CodeGen/AMDGPU/smed3.ll b/llvm/test/CodeGen/AMDGPU/smed3.ll index f2b64f06426..22c3b2d42f3 100644 --- a/llvm/test/CodeGen/AMDGPU/smed3.ll +++ b/llvm/test/CodeGen/AMDGPU/smed3.ll @@ -681,6 +681,28 @@ bb: ret void } +; GCN-LABEL: {{^}}v_test_smed3_i16_pat_1: +; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} + +define amdgpu_kernel void @v_test_smed3_i16_pat_1(i16 addrspace(1)* %arg, i16 addrspace(1)* %out, i16 addrspace(1)* %a.ptr) #1 { +bb: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i32 %tid + %gep1 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 3 + %gep2 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 8 + %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid + %x = load i16, i16 addrspace(1)* %gep0 + %y = load i16, i16 addrspace(1)* %gep1 + %z = load i16, i16 addrspace(1)* %gep2 + + %tmp0 = call i16 @smin16(i16 %x, i16 %y) + %tmp1 = call i16 @smax16(i16 %x, i16 %y) + %tmp2 = call i16 @smax16(i16 %tmp0, i16 %z) + %tmp3 = call i16 @smin16(i16 %tmp1, i16 %tmp2) + store i16 %tmp3, i16 addrspace(1)* %out.gep + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } attributes #2 = { nounwind readnone alwaysinline } diff --git a/llvm/test/CodeGen/AMDGPU/umed3.ll b/llvm/test/CodeGen/AMDGPU/umed3.ll index 315308e03ec..2f6685921df 100644 --- a/llvm/test/CodeGen/AMDGPU/umed3.ll +++ b/llvm/test/CodeGen/AMDGPU/umed3.ll @@ -716,6 +716,27 @@ bb: ret void } +; GCN-LABEL: {{^}}v_test_umed3_i16_pat_1: +; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define amdgpu_kernel void @v_test_umed3_i16_pat_1(i16 addrspace(1)* %arg, i16 addrspace(1)* %out, i16 addrspace(1)* %a.ptr) #1 { +bb: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i32 %tid + %gep1 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 3 + %gep2 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 8 + %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid + %x = load i16, i16 addrspace(1)* %gep0 + %y = load i16, i16 addrspace(1)* %gep1 + %z = load i16, i16 addrspace(1)* %gep2 + + %tmp0 = call i16 @umin16(i16 %x, i16 %y) + %tmp1 = call i16 @umax16(i16 %x, i16 %y) + %tmp2 = call i16 @umax16(i16 %tmp0, i16 %z) + %tmp3 = call i16 @umin16(i16 %tmp1, i16 %tmp2) + store i16 %tmp3, i16 addrspace(1)* %out.gep + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } attributes #2 = { nounwind readnone alwaysinline } |

