summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/SystemZ
diff options
context:
space:
mode:
authorUlrich Weigand <ulrich.weigand@de.ibm.com>2019-12-20 15:31:16 +0100
committerUlrich Weigand <ulrich.weigand@de.ibm.com>2019-12-20 15:36:56 +0100
commitede8293d7d9d4623be5a911cc076c1dfd7810b8c (patch)
treebd205ca6e7084818b4896cbdab2e0ffe1838032a /llvm/lib/Target/SystemZ
parent6cba90dc4de6427817bad763f018a502a9048f74 (diff)
downloadbcm5719-llvm-ede8293d7d9d4623be5a911cc076c1dfd7810b8c.tar.gz
bcm5719-llvm-ede8293d7d9d4623be5a911cc076c1dfd7810b8c.zip
[SystemZ][FPEnv] Enable strict vector FP extends/truncations
The back-end currently has special DAGCombine code to detect cases where two floating-point extend or truncate operations can be combined into a single vector operation. This patch extends that support to also handle strict FP operations. Note that currently only the case where both operations have the same input chain are supported. This already suffices to cover the common case where the operations result from scalarizing a non-legal vector type. More general cases can be supported in the future.
Diffstat (limited to 'llvm/lib/Target/SystemZ')
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp68
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrVector.td2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperators.td5
4 files changed, 66 insertions, 13 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 14e15bad933..c73905d3357 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -637,7 +637,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::STRICT_FP_ROUND);
setTargetDAGCombine(ISD::FP_EXTEND);
+ setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::SDIV);
setTargetDAGCombine(ISD::UDIV);
@@ -5386,6 +5388,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(VEXTEND);
OPCODE(STRICT_VEXTEND);
OPCODE(VROUND);
+ OPCODE(STRICT_VROUND);
OPCODE(VTM);
OPCODE(VFAE_CC);
OPCODE(VFAEZ_CC);
@@ -5908,6 +5911,19 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS(
return SDValue();
}
+static SDValue MergeInputChains(SDNode *N1, SDNode *N2) {
+ SDValue Chain1 = N1->getOperand(0);
+ SDValue Chain2 = N2->getOperand(0);
+
+ // Trivial case: both nodes take the same chain.
+ if (Chain1 == Chain2)
+ return Chain1;
+
+ // FIXME - we could handle more complex cases via TokenFactor,
+ // assuming we can verify that this would not create a cycle.
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineFP_ROUND(
SDNode *N, DAGCombinerInfo &DCI) const {
@@ -5920,8 +5936,9 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
// (extract_vector_elt (VROUND X) 2)
//
// This is a special case since the target doesn't really support v2f32s.
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
SelectionDAG &DAG = DCI.DAG;
- SDValue Op0 = N->getOperand(0);
+ SDValue Op0 = N->getOperand(OpNo);
if (N->getValueType(0) == MVT::f32 &&
Op0.hasOneUse() &&
Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -5937,20 +5954,34 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
U->getOperand(1).getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
SDValue OtherRound = SDValue(*U->use_begin(), 0);
- if (OtherRound.getOpcode() == ISD::FP_ROUND &&
- OtherRound.getOperand(0) == SDValue(U, 0) &&
+ if (OtherRound.getOpcode() == N->getOpcode() &&
+ OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
OtherRound.getValueType() == MVT::f32) {
- SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
- MVT::v4f32, Vec);
+ SDValue VRound, Chain;
+ if (N->isStrictFPOpcode()) {
+ Chain = MergeInputChains(N, OtherRound.getNode());
+ if (!Chain)
+ continue;
+ VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
+ {MVT::v4f32, MVT::Other}, {Chain, Vec});
+ Chain = VRound.getValue(1);
+ } else
+ VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
+ MVT::v4f32, Vec);
DCI.AddToWorklist(VRound.getNode());
SDValue Extract1 =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
DCI.AddToWorklist(Extract1.getNode());
DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
+ if (Chain)
+ DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
SDValue Extract0 =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ if (Chain)
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
+ N->getVTList(), Extract0, Chain);
return Extract0;
}
}
@@ -5971,8 +6002,9 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
// (extract_vector_elt (VEXTEND X) 1)
//
// This is a special case since the target doesn't really support v2f32s.
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
SelectionDAG &DAG = DCI.DAG;
- SDValue Op0 = N->getOperand(0);
+ SDValue Op0 = N->getOperand(OpNo);
if (N->getValueType(0) == MVT::f64 &&
Op0.hasOneUse() &&
Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -5988,20 +6020,34 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
U->getOperand(1).getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
SDValue OtherExtend = SDValue(*U->use_begin(), 0);
- if (OtherExtend.getOpcode() == ISD::FP_EXTEND &&
- OtherExtend.getOperand(0) == SDValue(U, 0) &&
+ if (OtherExtend.getOpcode() == N->getOpcode() &&
+ OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
OtherExtend.getValueType() == MVT::f64) {
- SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
- MVT::v2f64, Vec);
+ SDValue VExtend, Chain;
+ if (N->isStrictFPOpcode()) {
+ Chain = MergeInputChains(N, OtherExtend.getNode());
+ if (!Chain)
+ continue;
+ VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
+ {MVT::v2f64, MVT::Other}, {Chain, Vec});
+ Chain = VExtend.getValue(1);
+ } else
+ VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
+ MVT::v2f64, Vec);
DCI.AddToWorklist(VExtend.getNode());
SDValue Extract1 =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
DCI.AddToWorklist(Extract1.getNode());
DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
+ if (Chain)
+ DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
SDValue Extract0 =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ if (Chain)
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
+ N->getVTList(), Extract0, Chain);
return Extract0;
}
}
@@ -6341,7 +6387,9 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index e49c47e379e..0ac07a12ab7 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -267,8 +267,8 @@ enum NodeType : unsigned {
VEXTEND, STRICT_VEXTEND,
// Round the f64 elements of vector operand 0 to f32s and store them in the
- // even elements of the result.
- VROUND,
+ // even elements of the result. Regular and strict versions.
+ VROUND, STRICT_VROUND,
// AND the two vector operands together and set CC based on the result.
VTM,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index de6e473dd56..c945122ee57 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -1156,7 +1156,7 @@ let Predicates = [FeatureVector] in {
def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
}
- def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
+ def : Pat<(v4f32 (z_any_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>;
let Predicates = [FeatureVectorEnhancements1] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 0beefc4682a..a6a72903e57 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -353,6 +353,8 @@ def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND",
SDT_ZVecUnaryConv, [SDNPHasChain]>;
def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
+def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND",
+ SDT_ZVecUnaryConv, [SDNPHasChain]>;
def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>;
def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>;
def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>;
@@ -741,6 +743,9 @@ def z_any_vfcmphe : PatFrags<(ops node:$lhs, node:$rhs),
def z_any_vextend : PatFrags<(ops node:$src),
[(z_strict_vextend node:$src),
(z_vextend node:$src)]>;
+def z_any_vround : PatFrags<(ops node:$src),
+ [(z_strict_vround node:$src),
+ (z_vround node:$src)]>;
// Create a unary operator that loads from memory and then performs
// the given operation on it.
OpenPOWER on IntegriCloud