summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp68
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrVector.td2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperators.td5
-rw-r--r--llvm/test/CodeGen/SystemZ/vec-strict-conv-02.ll61
-rw-r--r--llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll21
6 files changed, 136 insertions, 25 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 14e15bad933..c73905d3357 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -637,7 +637,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::STRICT_FP_ROUND);
setTargetDAGCombine(ISD::FP_EXTEND);
+ setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::SDIV);
setTargetDAGCombine(ISD::UDIV);
@@ -5386,6 +5388,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(VEXTEND);
OPCODE(STRICT_VEXTEND);
OPCODE(VROUND);
+ OPCODE(STRICT_VROUND);
OPCODE(VTM);
OPCODE(VFAE_CC);
OPCODE(VFAEZ_CC);
@@ -5908,6 +5911,19 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS(
return SDValue();
}
+static SDValue MergeInputChains(SDNode *N1, SDNode *N2) {
+ SDValue Chain1 = N1->getOperand(0);
+ SDValue Chain2 = N2->getOperand(0);
+
+ // Trivial case: both nodes take the same chain.
+ if (Chain1 == Chain2)
+ return Chain1;
+
+ // FIXME - we could handle more complex cases via TokenFactor,
+ // assuming we can verify that this would not create a cycle.
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineFP_ROUND(
SDNode *N, DAGCombinerInfo &DCI) const {
@@ -5920,8 +5936,9 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
// (extract_vector_elt (VROUND X) 2)
//
// This is a special case since the target doesn't really support v2f32s.
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
SelectionDAG &DAG = DCI.DAG;
- SDValue Op0 = N->getOperand(0);
+ SDValue Op0 = N->getOperand(OpNo);
if (N->getValueType(0) == MVT::f32 &&
Op0.hasOneUse() &&
Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -5937,20 +5954,34 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
U->getOperand(1).getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
SDValue OtherRound = SDValue(*U->use_begin(), 0);
- if (OtherRound.getOpcode() == ISD::FP_ROUND &&
- OtherRound.getOperand(0) == SDValue(U, 0) &&
+ if (OtherRound.getOpcode() == N->getOpcode() &&
+ OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
OtherRound.getValueType() == MVT::f32) {
- SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
- MVT::v4f32, Vec);
+ SDValue VRound, Chain;
+ if (N->isStrictFPOpcode()) {
+ Chain = MergeInputChains(N, OtherRound.getNode());
+ if (!Chain)
+ continue;
+ VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
+ {MVT::v4f32, MVT::Other}, {Chain, Vec});
+ Chain = VRound.getValue(1);
+ } else
+ VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
+ MVT::v4f32, Vec);
DCI.AddToWorklist(VRound.getNode());
SDValue Extract1 =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
DCI.AddToWorklist(Extract1.getNode());
DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
+ if (Chain)
+ DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
SDValue Extract0 =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ if (Chain)
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
+ N->getVTList(), Extract0, Chain);
return Extract0;
}
}
@@ -5971,8 +6002,9 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
// (extract_vector_elt (VEXTEND X) 1)
//
// This is a special case since the target doesn't really support v2f32s.
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
SelectionDAG &DAG = DCI.DAG;
- SDValue Op0 = N->getOperand(0);
+ SDValue Op0 = N->getOperand(OpNo);
if (N->getValueType(0) == MVT::f64 &&
Op0.hasOneUse() &&
Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -5988,20 +6020,34 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
U->getOperand(1).getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
SDValue OtherExtend = SDValue(*U->use_begin(), 0);
- if (OtherExtend.getOpcode() == ISD::FP_EXTEND &&
- OtherExtend.getOperand(0) == SDValue(U, 0) &&
+ if (OtherExtend.getOpcode() == N->getOpcode() &&
+ OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
OtherExtend.getValueType() == MVT::f64) {
- SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
- MVT::v2f64, Vec);
+ SDValue VExtend, Chain;
+ if (N->isStrictFPOpcode()) {
+ Chain = MergeInputChains(N, OtherExtend.getNode());
+ if (!Chain)
+ continue;
+ VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
+ {MVT::v2f64, MVT::Other}, {Chain, Vec});
+ Chain = VExtend.getValue(1);
+ } else
+ VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
+ MVT::v2f64, Vec);
DCI.AddToWorklist(VExtend.getNode());
SDValue Extract1 =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
DCI.AddToWorklist(Extract1.getNode());
DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
+ if (Chain)
+ DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
SDValue Extract0 =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ if (Chain)
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
+ N->getVTList(), Extract0, Chain);
return Extract0;
}
}
@@ -6341,7 +6387,9 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index e49c47e379e..0ac07a12ab7 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -267,8 +267,8 @@ enum NodeType : unsigned {
VEXTEND, STRICT_VEXTEND,
// Round the f64 elements of vector operand 0 to f32s and store them in the
- // even elements of the result.
- VROUND,
+ // even elements of the result. Regular and strict versions.
+ VROUND, STRICT_VROUND,
// AND the two vector operands together and set CC based on the result.
VTM,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index de6e473dd56..c945122ee57 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -1156,7 +1156,7 @@ let Predicates = [FeatureVector] in {
def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
}
- def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
+ def : Pat<(v4f32 (z_any_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>;
let Predicates = [FeatureVectorEnhancements1] in {
let Uses = [FPC], mayRaiseFPException = 1 in {
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 0beefc4682a..a6a72903e57 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -353,6 +353,8 @@ def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND",
SDT_ZVecUnaryConv, [SDNPHasChain]>;
def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
+def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND",
+ SDT_ZVecUnaryConv, [SDNPHasChain]>;
def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>;
def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>;
def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>;
@@ -741,6 +743,9 @@ def z_any_vfcmphe : PatFrags<(ops node:$lhs, node:$rhs),
def z_any_vextend : PatFrags<(ops node:$src),
[(z_strict_vextend node:$src),
(z_vextend node:$src)]>;
+def z_any_vround : PatFrags<(ops node:$src),
+ [(z_strict_vround node:$src),
+ (z_vround node:$src)]>;
// Create a unary operator that loads from memory and then performs
// the given operation on it.
diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-conv-02.ll b/llvm/test/CodeGen/SystemZ/vec-strict-conv-02.ll
new file mode 100644
index 00000000000..d4590a57d3e
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-strict-conv-02.ll
@@ -0,0 +1,61 @@
+; Test conversions between different-sized float elements.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
+declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
+
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
+declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
+
+; Test cases where both elements of a v2f64 are converted to f32s.
+define void @f1(<2 x double> %val, <2 x float> *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vledb {{%v[0-9]+}}, %v24, 0, 0
+; CHECK: br %r14
+ %res = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
+ <2 x double> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ store <2 x float> %res, <2 x float> *%ptr
+ ret void
+}
+
+; Test conversion of an f64 in a vector register to an f32.
+define float @f2(<2 x double> %vec) #0 {
+; CHECK-LABEL: f2:
+; CHECK: wledb %f0, %v24, 0, 0
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %vec, i32 0
+ %ret = call float @llvm.experimental.constrained.fptrunc.f32.f64(
+ double %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret float %ret
+}
+
+; Test cases where even elements of a v4f32 are converted to f64s.
+define <2 x double> @f3(<4 x float> %vec) {
+; CHECK-LABEL: f3:
+; CHECK: vldeb %v24, {{%v[0-9]+}}
+; CHECK: br %r14
+ %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> <i32 0, i32 2>
+ %res = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
+ <2 x float> %shuffle,
+ metadata !"fpexcept.strict") #0
+ ret <2 x double> %res
+}
+
+; Test conversion of an f32 in a vector register to an f64.
+define double @f4(<4 x float> %vec) {
+; CHECK-LABEL: f4:
+; CHECK: wldeb %f0, %v24
+; CHECK: br %r14
+ %scalar = extractelement <4 x float> %vec, i32 0
+ %ret = call double @llvm.experimental.constrained.fpext.f64.f32(
+ float %scalar,
+ metadata !"fpexcept.strict") #0
+ ret double %ret
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index e7c4e3a4466..348be4a9f14 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -5417,13 +5417,12 @@ define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* %
; SZ13-LABEL: constrained_vector_fptrunc_v3f64:
; SZ13: # %bb.0: # %entry
; SZ13-NEXT: vl %v1, 0(%r2), 4
-; SZ13-NEXT: ledbra %f2, 0, %f1, 0
-; SZ13-NEXT: vrepg %v1, %v1, 1
+; SZ13-NEXT: vledb %v1, %v1, 0, 0
+; SZ13-NEXT: larl %r1, .LCPI97_0
; SZ13-NEXT: ld %f0, 16(%r2)
-; SZ13-NEXT: ledbra %f1, 0, %f1, 0
+; SZ13-NEXT: vl %v2, 0(%r1), 3
+; SZ13-NEXT: vperm %v1, %v1, %v0, %v2
; SZ13-NEXT: ledbra %f0, 0, %f0, 0
-; SZ13-NEXT: vmrhf %v1, %v2, %v1
-; SZ13-NEXT: vmrhg %v1, %v1, %v1
; SZ13-NEXT: ste %f0, 8(%r3)
; SZ13-NEXT: vsteg %v1, 0(%r3), 0
; SZ13-NEXT: br %r14
@@ -5544,13 +5543,11 @@ define void @constrained_vector_fpext_v3f64(<3 x float>* %src, <3 x double>* %de
; SZ13-LABEL: constrained_vector_fpext_v3f64:
; SZ13: # %bb.0: # %entry
; SZ13-NEXT: vl %v0, 0(%r2), 4
-; SZ13-NEXT: vrepf %v2, %v0, 1
-; SZ13-NEXT: ldebr %f1, %f0
-; SZ13-NEXT: ldebr %f2, %f2
-; SZ13-NEXT: vrepf %v0, %v0, 2
-; SZ13-NEXT: ldebr %f0, %f0
-; SZ13-NEXT: vmrhg %v1, %v1, %v2
-; SZ13-NEXT: std %f0, 16(%r3)
+; SZ13-NEXT: vrepf %v1, %v0, 1
+; SZ13-NEXT: vldeb %v0, %v0
+; SZ13-NEXT: ldebr %f1, %f1
+; SZ13-NEXT: vmrhg %v1, %v0, %v1
+; SZ13-NEXT: vsteg %v0, 16(%r3), 1
; SZ13-NEXT: vst %v1, 0(%r3), 4
; SZ13-NEXT: br %r14
entry:
OpenPOWER on IntegriCloud