summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp164
1 files changed, 129 insertions, 35 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index dd99b71eda6..619b13fe86e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -251,12 +251,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::UREM, MVT::i64, Expand);
}
- if (Subtarget.hasP9Vector()) {
- setOperationAction(ISD::ABS, MVT::v4i32, Legal);
- setOperationAction(ISD::ABS, MVT::v8i16, Legal);
- setOperationAction(ISD::ABS, MVT::v16i8, Legal);
- }
-
// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
@@ -556,6 +550,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Custom);
// Vector instructions introduced in P8
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
@@ -661,6 +656,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+ // Without hasP8Altivec set, v2i64 SMAX isn't available.
+ // But ABS custom lowering requires SMAX support.
+ if (!Subtarget.hasP8Altivec())
+ setOperationAction(ISD::ABS, MVT::v2i64, Expand);
+
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
@@ -1083,6 +1083,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::FSQRT);
}
+ if (Subtarget.hasP9Altivec()) {
+ setTargetDAGCombine(ISD::ABS);
+ }
+
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget.isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -1343,6 +1347,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::RFEBB: return "PPCISD::RFEBB";
case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
+ case PPCISD::VABSD: return "PPCISD::VABSD";
case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
@@ -9003,35 +9008,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getRegister(PPC::R2, MVT::i32);
}
- // We are looking for absolute values here.
- // The idea is to try to fit one of two patterns:
- // max (a, (0-a)) OR max ((0-a), a)
- if (Subtarget.hasP9Vector() &&
- (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw ||
- IntrinsicID == Intrinsic::ppc_altivec_vmaxsh ||
- IntrinsicID == Intrinsic::ppc_altivec_vmaxsb)) {
- SDValue V1 = Op.getOperand(1);
- SDValue V2 = Op.getOperand(2);
- if (V1.getSimpleValueType() == V2.getSimpleValueType() &&
- (V1.getSimpleValueType() == MVT::v4i32 ||
- V1.getSimpleValueType() == MVT::v8i16 ||
- V1.getSimpleValueType() == MVT::v16i8)) {
- if ( V1.getOpcode() == ISD::SUB &&
- ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
- V1.getOperand(1) == V2 ) {
- // Generate the abs instruction with the operands
- return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2);
- }
-
- if ( V2.getOpcode() == ISD::SUB &&
- ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
- V2.getOperand(1) == V1 ) {
- // Generate the abs instruction with the operands
- return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1);
- }
- }
- }
-
// If this is a lowered altivec predicate compare, CompareOpc is set to the
// opcode number of the comparison.
int CompareOpc;
@@ -9572,6 +9548,44 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
}
}
+SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+
+ assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
+
+ EVT VT = Op.getValueType();
+ assert(VT.isVector() &&
+ "Only set vector abs as custom, scalar abs shouldn't reach here!");
+ assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ VT == MVT::v16i8) &&
+ "Unexpected vector element type!");
+ assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
+ "Current subtarget doesn't support smax v2i64!");
+
+ // For vector abs, it can be lowered to:
+ // abs x
+ // ==>
+ // y = -x
+ // smax(x, y)
+
+ SDLoc dl(Op);
+ SDValue X = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
+
+ // SMAX patch https://reviews.llvm.org/D47332
+ // hasn't landed yet, so use intrinsic first here.
+ // TODO: Should use SMAX directly once SMAX patch landed
+ Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
+ if (VT == MVT::v2i64)
+ BifID = Intrinsic::ppc_altivec_vmaxsd;
+ else if (VT == MVT::v8i16)
+ BifID = Intrinsic::ppc_altivec_vmaxsh;
+ else if (VT == MVT::v16i8)
+ BifID = Intrinsic::ppc_altivec_vmaxsb;
+
+ return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -9624,6 +9638,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::ABS: return LowerABS(Op, DAG);
// For counter-based loop handling.
case ISD::INTRINSIC_W_CHAIN: return SDValue();
@@ -12985,6 +13000,39 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
+
+ // Combine vmaxsw/h/b(a, a's negation) to abs(a)
+ // Expose the vabsduw/h/b opportunity for down stream
+ if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
+ (IID == Intrinsic::ppc_altivec_vmaxsw ||
+ IID == Intrinsic::ppc_altivec_vmaxsh ||
+ IID == Intrinsic::ppc_altivec_vmaxsb)) {
+ SDValue V1 = N->getOperand(1);
+ SDValue V2 = N->getOperand(2);
+ if ((V1.getSimpleValueType() == MVT::v4i32 ||
+ V1.getSimpleValueType() == MVT::v8i16 ||
+ V1.getSimpleValueType() == MVT::v16i8) &&
+ V1.getSimpleValueType() == V2.getSimpleValueType()) {
+ // (0-a, a)
+ if (V1.getOpcode() == ISD::SUB &&
+ ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
+ V1.getOperand(1) == V2) {
+ return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
+ }
+ // (a, 0-a)
+ if (V2.getOpcode() == ISD::SUB &&
+ ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
+ V2.getOperand(1) == V1) {
+ return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
+ }
+ // (x-y, y-x)
+ if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
+ V1.getOperand(0) == V2.getOperand(1) &&
+ V1.getOperand(1) == V2.getOperand(0)) {
+ return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
+ }
+ }
+ }
}
break;
@@ -13217,6 +13265,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::BUILD_VECTOR:
return DAGCombineBuildVector(N, DCI);
+ case ISD::ABS:
+ return combineABS(N, DCI);
}
return SDValue();
@@ -14503,3 +14553,47 @@ isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
// For non-constant masks, we can always use the record-form and.
return true;
}
+
+// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
+// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
+// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
+// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
+// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
+SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
+ assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
+ assert(Subtarget.hasP9Altivec() &&
+ "Only combine this when P9 altivec supported!");
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ if (N->getOperand(0).getOpcode() == ISD::SUB) {
+ // Even for signed integers, if it's known to be positive (as signed
+ // integer) due to zero-extended inputs.
+ unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
+ unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
+ if ((SubOpcd0 == ISD::ZERO_EXTEND ||
+ SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ (SubOpcd1 == ISD::ZERO_EXTEND ||
+ SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
+ return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
+ N->getOperand(0)->getOperand(0),
+ N->getOperand(0)->getOperand(1),
+ DAG.getTargetConstant(0, dl, MVT::i32));
+ }
+
+ // For type v4i32, it can be optimized with xvnegsp + vabsduw
+ if (N->getOperand(0).getValueType() == MVT::v4i32 &&
+ N->getOperand(0).hasOneUse()) {
+ return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
+ N->getOperand(0)->getOperand(0),
+ N->getOperand(0)->getOperand(1),
+ DAG.getTargetConstant(1, dl, MVT::i32));
+ }
+ }
+
+ return SDValue();
+}
+
OpenPOWER on IntegriCloud