summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorAakanksha Patil <aakanksha555@gmail.com>2018-11-14 20:10:41 +0000
committerAakanksha Patil <aakanksha555@gmail.com>2018-11-14 20:10:41 +0000
commit1a60116b5c18d567912bd7e0e7c18052c1059086 (patch)
tree5be357e3a32b8ce8af7ae313c56123aaa1658ac5 /llvm/lib/Target/AMDGPU
parent808e157356234ecc865c0baecb2e22df5e4d54a8 (diff)
downloadbcm5719-llvm-1a60116b5c18d567912bd7e0e7c18052c1059086.tar.gz
bcm5719-llvm-1a60116b5c18d567912bd7e0e7c18052c1059086.zip
AMDGPU: Additional pattern for i16 median3 matching
min(max(a, b), max(min(a, b), c)) Differential Revision: https://reviews.llvm.org/D54494 llvm-svn: 346886
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td21
1 files changed, 17 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index cc0b978ea3c..11136e0a1df 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1650,20 +1650,33 @@ class FP16Med3Pat<ValueType vt,
(med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE)
>;
-class Int16Med3Pat<Instruction med3Inst,
+multiclass Int16Med3Pat<Instruction med3Inst,
+ SDPatternOperator min,
SDPatternOperator max,
SDPatternOperator max_oneuse,
SDPatternOperator min_oneuse,
- ValueType vt = i32> : GCNPat<
+ ValueType vt = i16> {
+ // This matches 16 permutations of
+ // max(min(x, y), min(max(x, y), z))
+ def : GCNPat <
(max (min_oneuse vt:$src0, vt:$src1),
(min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
(med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
>;
+ // This matches 16 permutations of
+ // min(max(a, b), max(min(a, b), c))
+ def : GCNPat <
+ (min (max_oneuse vt:$src0, vt:$src1),
+ (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
+ (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
+>;
+}
+
def : FPMed3Pat<f32, V_MED3_F32>;
let OtherPredicates = [isGFX9] in {
def : FP16Med3Pat<f16, V_MED3_F16>;
-def : Int16Med3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
-def : Int16Med3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
+defm : Int16Med3Pat<V_MED3_I16, smin, smax, smax_oneuse, smin_oneuse>;
+defm : Int16Med3Pat<V_MED3_U16, umin, umax, umax_oneuse, umin_oneuse>;
} // End Predicates = [isGFX9]
OpenPOWER on IntegriCloud