diff options
| author | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2019-12-20 15:31:16 +0100 |
|---|---|---|
| committer | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2019-12-20 15:36:56 +0100 |
| commit | ede8293d7d9d4623be5a911cc076c1dfd7810b8c (patch) | |
| tree | bd205ca6e7084818b4896cbdab2e0ffe1838032a /llvm/lib/Target/SystemZ | |
| parent | 6cba90dc4de6427817bad763f018a502a9048f74 (diff) | |
| download | bcm5719-llvm-ede8293d7d9d4623be5a911cc076c1dfd7810b8c.tar.gz bcm5719-llvm-ede8293d7d9d4623be5a911cc076c1dfd7810b8c.zip | |
[SystemZ][FPEnv] Enable strict vector FP extends/truncations
The back-end currently has special DAGCombine code to detect
cases where two floating-point extend or truncate operations
can be combined into a single vector operation.
This patch extends that support to also handle strict FP operations.
Note that currently only the case where both operations have the
same input chain are supported. This already suffices to cover
the common case where the operations result from scalarizing a
non-legal vector type. More general cases can be supported in
the future.
Diffstat (limited to 'llvm/lib/Target/SystemZ')
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 68 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrVector.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZOperators.td | 5 |
4 files changed, 66 insertions, 13 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 14e15bad933..c73905d3357 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -637,7 +637,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::STRICT_FP_ROUND); setTargetDAGCombine(ISD::FP_EXTEND); + setTargetDAGCombine(ISD::STRICT_FP_EXTEND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::SDIV); setTargetDAGCombine(ISD::UDIV); @@ -5386,6 +5388,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(VEXTEND); OPCODE(STRICT_VEXTEND); OPCODE(VROUND); + OPCODE(STRICT_VROUND); OPCODE(VTM); OPCODE(VFAE_CC); OPCODE(VFAEZ_CC); @@ -5908,6 +5911,19 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS( return SDValue(); } +static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { + SDValue Chain1 = N1->getOperand(0); + SDValue Chain2 = N2->getOperand(0); + + // Trivial case: both nodes take the same chain. + if (Chain1 == Chain2) + return Chain1; + + // FIXME - we could handle more complex cases via TokenFactor, + // assuming we can verify that this would not create a cycle. + return SDValue(); +} + SDValue SystemZTargetLowering::combineFP_ROUND( SDNode *N, DAGCombinerInfo &DCI) const { @@ -5920,8 +5936,9 @@ SDValue SystemZTargetLowering::combineFP_ROUND( // (extract_vector_elt (VROUND X) 2) // // This is a special case since the target doesn't really support v2f32s. + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; - SDValue Op0 = N->getOperand(0); + SDValue Op0 = N->getOperand(OpNo); if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && @@ -5937,20 +5954,34 @@ SDValue SystemZTargetLowering::combineFP_ROUND( U->getOperand(1).getOpcode() == ISD::Constant && cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) { SDValue OtherRound = SDValue(*U->use_begin(), 0); - if (OtherRound.getOpcode() == ISD::FP_ROUND && - OtherRound.getOperand(0) == SDValue(U, 0) && + if (OtherRound.getOpcode() == N->getOpcode() && + OtherRound.getOperand(OpNo) == SDValue(U, 0) && OtherRound.getValueType() == MVT::f32) { - SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), - MVT::v4f32, Vec); + SDValue VRound, Chain; + if (N->isStrictFPOpcode()) { + Chain = MergeInputChains(N, OtherRound.getNode()); + if (!Chain) + continue; + VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N), + {MVT::v4f32, MVT::Other}, {Chain, Vec}); + Chain = VRound.getValue(1); + } else + VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), + MVT::v4f32, Vec); DCI.AddToWorklist(VRound.getNode()); SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); DCI.AddToWorklist(Extract1.getNode()); DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); + if (Chain) + DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + if (Chain) + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), + N->getVTList(), Extract0, Chain); return Extract0; } } @@ -5971,8 +6002,9 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( // (extract_vector_elt (VEXTEND X) 1) // // This is a special case since the target doesn't really support v2f32s. + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; - SDValue Op0 = N->getOperand(0); + SDValue Op0 = N->getOperand(OpNo); if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && @@ -5988,20 +6020,34 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( U->getOperand(1).getOpcode() == ISD::Constant && cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) { SDValue OtherExtend = SDValue(*U->use_begin(), 0); - if (OtherExtend.getOpcode() == ISD::FP_EXTEND && - OtherExtend.getOperand(0) == SDValue(U, 0) && + if (OtherExtend.getOpcode() == N->getOpcode() && + OtherExtend.getOperand(OpNo) == SDValue(U, 0) && OtherExtend.getValueType() == MVT::f64) { - SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), - MVT::v2f64, Vec); + SDValue VExtend, Chain; + if (N->isStrictFPOpcode()) { + Chain = MergeInputChains(N, OtherExtend.getNode()); + if (!Chain) + continue; + VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N), + {MVT::v2f64, MVT::Other}, {Chain, Vec}); + Chain = VExtend.getValue(1); + } else + VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), + MVT::v2f64, Vec); DCI.AddToWorklist(VExtend.getNode()); SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); DCI.AddToWorklist(Extract1.getNode()); DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1); + if (Chain) + DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); + if (Chain) + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), + N->getVTList(), Extract0, Chain); return Extract0; } } @@ -6341,7 +6387,9 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index e49c47e379e..0ac07a12ab7 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -267,8 +267,8 @@ enum NodeType : unsigned { VEXTEND, STRICT_VEXTEND, // Round the f64 elements of vector operand 0 to f32s and store them in the - // even elements of the result. - VROUND, + // even elements of the result. Regular and strict versions. + VROUND, STRICT_VROUND, // AND the two vector operands together and set CC based on the result. VTM, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index de6e473dd56..c945122ee57 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -1156,7 +1156,7 @@ let Predicates = [FeatureVector] in { def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>; def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>; } - def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; + def : Pat<(v4f32 (z_any_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>; let Predicates = [FeatureVectorEnhancements1] in { let Uses = [FPC], mayRaiseFPException = 1 in { diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 0beefc4682a..a6a72903e57 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -353,6 +353,8 @@ def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND", SDT_ZVecUnaryConv, [SDNPHasChain]>; def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; +def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND", + SDT_ZVecUnaryConv, [SDNPHasChain]>; def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>; def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>; def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>; @@ -741,6 +743,9 @@ def z_any_vfcmphe : PatFrags<(ops node:$lhs, node:$rhs), def z_any_vextend : PatFrags<(ops node:$src), [(z_strict_vextend node:$src), (z_vextend node:$src)]>; +def z_any_vround : PatFrags<(ops node:$src), + [(z_strict_vround node:$src), + (z_vround node:$src)]>; // Create a unary operator that loads from memory and then performs // the given operation on it. |

