6 files changed, 136 insertions, 25 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 14e15bad933..c73905d3357 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -637,7 +637,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::FP_ROUND);
+  setTargetDAGCombine(ISD::STRICT_FP_ROUND);
   setTargetDAGCombine(ISD::FP_EXTEND);
+  setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
   setTargetDAGCombine(ISD::BSWAP);
   setTargetDAGCombine(ISD::SDIV);
   setTargetDAGCombine(ISD::UDIV);
@@ -5386,6 +5388,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(VEXTEND);
     OPCODE(STRICT_VEXTEND);
     OPCODE(VROUND);
+    OPCODE(STRICT_VROUND);
     OPCODE(VTM);
     OPCODE(VFAE_CC);
     OPCODE(VFAEZ_CC);
@@ -5908,6 +5911,19 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS(
   return SDValue();
 }
 
+static SDValue MergeInputChains(SDNode *N1, SDNode *N2) {
+  SDValue Chain1 = N1->getOperand(0);
+  SDValue Chain2 = N2->getOperand(0);
+
+  // Trivial case: both nodes take the same chain.
+  if (Chain1 == Chain2)
+    return Chain1;
+
+  // FIXME - we could handle more complex cases via TokenFactor,
+  // assuming we can verify that this would not create a cycle.
+  return SDValue();
+}
+
 SDValue SystemZTargetLowering::combineFP_ROUND(
     SDNode *N, DAGCombinerInfo &DCI) const {
 
@@ -5920,8 +5936,9 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
   // (extract_vector_elt (VROUND X) 2)
   //
   // This is a special case since the target doesn't really support v2f32s.
+  unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
   SelectionDAG &DAG = DCI.DAG;
-  SDValue Op0 = N->getOperand(0);
+  SDValue Op0 = N->getOperand(OpNo);
   if (N->getValueType(0) == MVT::f32 &&
       Op0.hasOneUse() &&
       Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -5937,20 +5954,34 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
           U->getOperand(1).getOpcode() == ISD::Constant &&
           cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
         SDValue OtherRound = SDValue(*U->use_begin(), 0);
-        if (OtherRound.getOpcode() == ISD::FP_ROUND &&
-            OtherRound.getOperand(0) == SDValue(U, 0) &&
+        if (OtherRound.getOpcode() == N->getOpcode() &&
+            OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
             OtherRound.getValueType() == MVT::f32) {
-          SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
-                                       MVT::v4f32, Vec);
+          SDValue VRound, Chain;
+          if (N->isStrictFPOpcode()) {
+            Chain = MergeInputChains(N, OtherRound.getNode());
+            if (!Chain)
+              continue;
+            VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
+                                 {MVT::v4f32, MVT::Other}, {Chain, Vec});
+            Chain = VRound.getValue(1);
+          } else
+            VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
+                                 MVT::v4f32, Vec);
           DCI.AddToWorklist(VRound.getNode());
           SDValue Extract1 =
             DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
                         VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
           DCI.AddToWorklist(Extract1.getNode());
           DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
+          if (Chain)
+            DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
           SDValue Extract0 =
             DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
                         VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+          if (Chain)
+            return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
+                               N->getVTList(), Extract0, Chain);
           return Extract0;
         }
       }
@@ -5971,8 +6002,9 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
   // (extract_vector_elt (VEXTEND X) 1)
   //
   // This is a special case since the target doesn't really support v2f32s.
+  unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
   SelectionDAG &DAG = DCI.DAG;
-  SDValue Op0 = N->getOperand(0);
+  SDValue Op0 = N->getOperand(OpNo);
   if (N->getValueType(0) == MVT::f64 &&
       Op0.hasOneUse() &&
       Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -5988,20 +6020,34 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
           U->getOperand(1).getOpcode() == ISD::Constant &&
           cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
         SDValue OtherExtend = SDValue(*U->use_begin(), 0);
-        if (OtherExtend.getOpcode() == ISD::FP_EXTEND &&
-            OtherExtend.getOperand(0) == SDValue(U, 0) &&
+        if (OtherExtend.getOpcode() == N->getOpcode() &&
+            OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
             OtherExtend.getValueType() == MVT::f64) {
-          SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
-                                        MVT::v2f64, Vec);
+          SDValue VExtend, Chain;
+          if (N->isStrictFPOpcode()) {
+            Chain = MergeInputChains(N, OtherExtend.getNode());
+            if (!Chain)
+              continue;
+            VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
+                                  {MVT::v2f64, MVT::Other}, {Chain, Vec});
+            Chain = VExtend.getValue(1);
+          } else
+            VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
+                                  MVT::v2f64, Vec);
           DCI.AddToWorklist(VExtend.getNode());
           SDValue Extract1 =
             DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
                         VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
           DCI.AddToWorklist(Extract1.getNode());
           DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
+          if (Chain)
+            DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
           SDValue Extract0 =
             DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
                         VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+          if (Chain)
+            return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
+                               N->getVTList(), Extract0, Chain);
           return Extract0;
         }
       }
@@ -6341,7 +6387,9 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::VECTOR_SHUFFLE:     return combineVECTOR_SHUFFLE(N, DCI);
   case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
   case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
+  case ISD::STRICT_FP_ROUND:
   case ISD::FP_ROUND:           return combineFP_ROUND(N, DCI);
+  case ISD::STRICT_FP_EXTEND:
   case ISD::FP_EXTEND:          return combineFP_EXTEND(N, DCI);
   case ISD::BSWAP:              return combineBSWAP(N, DCI);
   case SystemZISD::BR_CCMASK:   return combineBR_CCMASK(N, DCI);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index e49c47e379e..0ac07a12ab7 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -267,8 +267,8 @@ enum NodeType : unsigned {
   VEXTEND, STRICT_VEXTEND,
 
   // Round the f64 elements of vector operand 0 to f32s and store them in the
-  // even elements of the result.
-  VROUND,
+  // even elements of the result.  Regular and strict versions.
+  VROUND, STRICT_VROUND,
 
   // AND the two vector operands together and set CC based on the result.
   VTM,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index de6e473dd56..c945122ee57 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -1156,7 +1156,7 @@ let Predicates = [FeatureVector] in {
     def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
     def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
   }
-  def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
+  def : Pat<(v4f32 (z_any_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
   def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>;
   let Predicates = [FeatureVectorEnhancements1] in {
     let Uses = [FPC], mayRaiseFPException = 1 in {
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 0beefc4682a..a6a72903e57 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -353,6 +353,8 @@ def z_vextend           : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
 def z_strict_vextend    : SDNode<"SystemZISD::STRICT_VEXTEND",
                                  SDT_ZVecUnaryConv, [SDNPHasChain]>;
 def z_vround            : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
+def z_strict_vround     : SDNode<"SystemZISD::STRICT_VROUND",
+                                 SDT_ZVecUnaryConv, [SDNPHasChain]>;
 def z_vtm               : SDNode<"SystemZISD::VTM", SDT_ZCmp>;
 def z_vfae_cc           : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>;
 def z_vfaez_cc          : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>;
@@ -741,6 +743,9 @@ def z_any_vfcmphe : PatFrags<(ops node:$lhs, node:$rhs),
 def z_any_vextend : PatFrags<(ops node:$src),
                              [(z_strict_vextend node:$src),
                               (z_vextend node:$src)]>;
+def z_any_vround  : PatFrags<(ops node:$src),
+                             [(z_strict_vround node:$src),
+                              (z_vround node:$src)]>;
 
 // Create a unary operator that loads from memory and then performs
 // the given operation on it.
diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-conv-02.ll b/llvm/test/CodeGen/SystemZ/vec-strict-conv-02.ll
new file mode 100644
index 00000000000..d4590a57d3e
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-strict-conv-02.ll
@@ -0,0 +1,61 @@
+; Test conversions between different-sized float elements.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
+declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
+
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
+declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
+
+; Test cases where both elements of a v2f64 are converted to f32s.
+define void @f1(<2 x double> %val, <2 x float> *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vledb {{%v[0-9]+}}, %v24, 0, 0
+; CHECK: br %r14
+  %res = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
+                                               <2 x double> %val,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  store <2 x float> %res, <2 x float> *%ptr
+  ret void
+}
+
+; Test conversion of an f64 in a vector register to an f32.
+define float @f2(<2 x double> %vec) #0 {
+; CHECK-LABEL: f2:
+; CHECK: wledb %f0, %v24, 0, 0
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %vec, i32 0
+  %ret = call float @llvm.experimental.constrained.fptrunc.f32.f64(
+                                               double %scalar,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %ret
+}
+
+; Test cases where even elements of a v4f32 are converted to f64s.
+define <2 x double> @f3(<4 x float> %vec) {
+; CHECK-LABEL: f3:
+; CHECK: vldeb %v24, {{%v[0-9]+}}
+; CHECK: br %r14
+  %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> <i32 0, i32 2>
+  %res = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
+                                               <2 x float> %shuffle,
+                                               metadata !"fpexcept.strict") #0
+  ret <2 x double> %res
+}
+
+; Test conversion of an f32 in a vector register to an f64.
+define double @f4(<4 x float> %vec) {
+; CHECK-LABEL: f4:
+; CHECK: wldeb %f0, %v24
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %vec, i32 0
+  %ret = call double @llvm.experimental.constrained.fpext.f64.f32(
+                                               float %scalar,
+                                               metadata !"fpexcept.strict") #0
+  ret double %ret
+}
+
+attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index e7c4e3a4466..348be4a9f14 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -5417,13 +5417,12 @@ define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* %
 ; SZ13-LABEL: constrained_vector_fptrunc_v3f64:
 ; SZ13:       # %bb.0: # %entry
 ; SZ13-NEXT:    vl %v1, 0(%r2), 4
-; SZ13-NEXT:    ledbra %f2, 0, %f1, 0
-; SZ13-NEXT:    vrepg %v1, %v1, 1
+; SZ13-NEXT:    vledb %v1, %v1, 0, 0
+; SZ13-NEXT:    larl %r1, .LCPI97_0
 ; SZ13-NEXT:    ld %f0, 16(%r2)
-; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    vl %v2, 0(%r1), 3
+; SZ13-NEXT:    vperm %v1, %v1, %v0, %v2
 ; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
-; SZ13-NEXT:    vmrhf %v1, %v2, %v1
-; SZ13-NEXT:    vmrhg %v1, %v1, %v1
 ; SZ13-NEXT:    ste %f0, 8(%r3)
 ; SZ13-NEXT:    vsteg %v1, 0(%r3), 0
 ; SZ13-NEXT:    br %r14
@@ -5544,13 +5543,11 @@ define void @constrained_vector_fpext_v3f64(<3 x float>* %src, <3 x double>* %de
 ; SZ13-LABEL: constrained_vector_fpext_v3f64:
 ; SZ13:       # %bb.0: # %entry
 ; SZ13-NEXT:    vl %v0, 0(%r2), 4
-; SZ13-NEXT:    vrepf %v2, %v0, 1
-; SZ13-NEXT:    ldebr %f1, %f0
-; SZ13-NEXT:    ldebr %f2, %f2
-; SZ13-NEXT:    vrepf %v0, %v0, 2
-; SZ13-NEXT:    ldebr %f0, %f0
-; SZ13-NEXT:    vmrhg %v1, %v1, %v2
-; SZ13-NEXT:    std %f0, 16(%r3)
+; SZ13-NEXT:    vrepf %v1, %v0, 1
+; SZ13-NEXT:    vldeb %v0, %v0
+; SZ13-NEXT:    ldebr %f1, %f1
+; SZ13-NEXT:    vmrhg %v1, %v0, %v1
+; SZ13-NEXT:    vsteg %v0, 16(%r3), 1
 ; SZ13-NEXT:    vst %v1, 0(%r3), 4
 ; SZ13-NEXT:    br %r14
 entry: