summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorNemanja Ivanovic <nemanja.i.ibm@gmail.com>2019-06-06 23:49:01 +0000
committerNemanja Ivanovic <nemanja.i.ibm@gmail.com>2019-06-06 23:49:01 +0000
commitef4a3aa549eadc150d4a31cff655064c3a323d12 (patch)
treef32e37feaee44ccae31c4574f919d5704b7b186c /llvm/lib
parente41e366ae72741261bfe71ec85cce4e2aed29109 (diff)
downloadbcm5719-llvm-ef4a3aa549eadc150d4a31cff655064c3a323d12.tar.gz
bcm5719-llvm-ef4a3aa549eadc150d4a31cff655064c3a323d12.zip
[PowerPC] Exploit the vector min/max instructions
Use the PPC vector min/max instructions for computing the corresponding operation as these should be faster than the compare/select sequences we currently emit. Differential revision: https://reviews.llvm.org/D47332 llvm-svn: 362759
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp18
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td26
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td21
3 files changed, 65 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index fa5099daf90..ee8fda9c437 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -555,6 +555,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
+ // For v2i64, these are only valid with P8Vector. This is corrected after
+ // the loop.
+ setOperationAction(ISD::SMAX, VT, Legal);
+ setOperationAction(ISD::SMIN, VT, Legal);
+ setOperationAction(ISD::UMAX, VT, Legal);
+ setOperationAction(ISD::UMIN, VT, Legal);
+
+ if (Subtarget.hasVSX()) {
+ setOperationAction(ISD::FMAXNUM, VT, Legal);
+ setOperationAction(ISD::FMINNUM, VT, Legal);
+ }
+
// Vector instructions introduced in P8
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
setOperationAction(ISD::CTPOP, VT, Legal);
@@ -638,6 +650,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
+ if (!Subtarget.hasP8Vector()) {
+ setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
+ setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
+ setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
+ setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
+ }
for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
setOperationAction(ISD::ABS, VT, Custom);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index ae362ff3cc6..8176c5120a8 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -900,6 +900,32 @@ def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>;
def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>;
def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>;
+// Max/Min
+def : Pat<(v16i8 (umax v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VMAXUB $src1, $src2))>;
+def : Pat<(v16i8 (smax v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VMAXSB $src1, $src2))>;
+def : Pat<(v8i16 (umax v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VMAXUH $src1, $src2))>;
+def : Pat<(v8i16 (smax v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VMAXSH $src1, $src2))>;
+def : Pat<(v4i32 (umax v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VMAXUW $src1, $src2))>;
+def : Pat<(v4i32 (smax v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VMAXSW $src1, $src2))>;
+def : Pat<(v16i8 (umin v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VMINUB $src1, $src2))>;
+def : Pat<(v16i8 (smin v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VMINSB $src1, $src2))>;
+def : Pat<(v8i16 (umin v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VMINUH $src1, $src2))>;
+def : Pat<(v8i16 (smin v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VMINSH $src1, $src2))>;
+def : Pat<(v4i32 (umin v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VMINUW $src1, $src2))>;
+def : Pat<(v4i32 (smin v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VMINSW $src1, $src2))>;
+
// Shuffles.
// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 482e5c532b1..47323b77775 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1188,6 +1188,15 @@ def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
(XXSEL $vC, $vB, $vA)>;
+def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)),
+ (v4f32 (XVMAXSP $src1, $src2))>;
+def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)),
+ (v4f32 (XVMINSP $src1, $src2))>;
+def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)),
+ (v2f64 (XVMAXDP $src1, $src2))>;
+def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)),
+ (v2f64 (XVMINDP $src1, $src2))>;
+
let Predicates = [IsLittleEndian] in {
def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
@@ -1522,6 +1531,18 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4),
(STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
+ def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+ def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+ def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+ def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
} // AddedComplexity = 400
} // HasP8Vector
OpenPOWER on IntegriCloud