summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorUlrich Weigand <ulrich.weigand@de.ibm.com>2015-05-05 19:26:48 +0000
committerUlrich Weigand <ulrich.weigand@de.ibm.com>2015-05-05 19:26:48 +0000
commitcd808237b24c7d6d0bb7ddf577dba37c31a06a50 (patch)
tree3e15e263edd31135f4279f6cb05b48dac89ad513 /llvm/lib/Target
parentce4c10958502b8f852dd88496272d262345a2513 (diff)
downloadbcm5719-llvm-cd808237b24c7d6d0bb7ddf577dba37c31a06a50.tar.gz
bcm5719-llvm-cd808237b24c7d6d0bb7ddf577dba37c31a06a50.zip
[SystemZ] Add CodeGen support for v2f64
This adds ABI and CodeGen support for the v2f64 type, which is natively supported by z13 instructions. Based on a patch by Richard Sandiford. llvm-svn: 236522
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.td6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp228
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h9
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrVector.td108
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperators.td14
-rw-r--r--llvm/lib/Target/SystemZ/SystemZPatterns.td14
6 files changed, 342 insertions, 37 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index f5eb32c0a60..360d348af3a 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -44,7 +44,7 @@ def RetCC_SystemZ : CallingConv<[
// Similarly for vectors, with V24 being the ABI-compliant choice.
CCIfSubtarget<"hasVector()",
- CCIfType<[v16i8, v8i16, v4i32, v2i64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v2f64],
CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
// ABI-compliant code returns long double by reference, but that conversion
@@ -76,13 +76,13 @@ def CC_SystemZ : CallingConv<[
// The first 8 named vector arguments are passed in V24-V31.
CCIfSubtarget<"hasVector()",
- CCIfType<[v16i8, v8i16, v4i32, v2i64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v2f64],
CCIfFixed<CCAssignToReg<[V24, V26, V28, V30,
V25, V27, V29, V31]>>>>,
// Other vector arguments are passed in 8-byte-aligned 16-byte stack slots.
CCIfSubtarget<"hasVector()",
- CCIfType<[v16i8, v8i16, v4i32, v2i64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v2f64],
CCAssignToStack<16, 8>>>,
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index ddcb792ee09..5f547439c9a 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -101,6 +101,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
}
// Compute derived properties from the register classes
@@ -327,6 +328,15 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
}
}
+ if (Subtarget.hasVector()) {
+ // There should be no need to check for float types other than v2f64
+ // since <2 x f32> isn't a legal type.
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ }
+
// Handle floating-point types.
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
I <= MVT::LAST_FP_VALUETYPE;
@@ -352,6 +362,33 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
}
}
+ // Handle floating-point vector types.
+ if (Subtarget.hasVector()) {
+ // Scalar-to-vector conversion is just a subreg.
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+
+ // Some insertions and extractions can be done directly but others
+ // need to go via integers.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
+
+ // These operations have direct equivalents.
+ setOperationAction(ISD::FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FABS, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+ }
+
// We have fused multiply-addition for f32 and f64 but not f128.
setOperationAction(ISD::FMA, MVT::f32, Legal);
setOperationAction(ISD::FMA, MVT::f64, Legal);
@@ -818,6 +855,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
+ case MVT::v2f64:
RC = &SystemZ::VR128BitRegClass;
break;
}
@@ -1894,18 +1932,25 @@ static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue,
return Result;
}
-// Return the SystemZISD vector comparison operation for CC, or 0 if it cannot
-// be done directly.
-static unsigned getVectorComparison(ISD::CondCode CC) {
+// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
+// be done directly. IsFP is true if CC is for a floating-point rather than
+// integer comparison.
+static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
switch (CC) {
+ case ISD::SETOEQ:
case ISD::SETEQ:
- return SystemZISD::VICMPE;
+ return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ return IsFP ? SystemZISD::VFCMPHE : 0;
+
+ case ISD::SETOGT:
case ISD::SETGT:
- return SystemZISD::VICMPH;
+ return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
case ISD::SETUGT:
- return SystemZISD::VICMPHL;
+ return IsFP ? 0 : SystemZISD::VICMPHL;
default:
return 0;
@@ -1914,15 +1959,17 @@ static unsigned getVectorComparison(ISD::CondCode CC) {
// Return the SystemZISD vector comparison operation for CC or its inverse,
// or 0 if neither can be done directly. Indicate in Invert whether the
-// result is for the inverse of CC.
-static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool &Invert) {
- if (unsigned Opcode = getVectorComparison(CC)) {
+// result is for the inverse of CC. IsFP is true if CC is for a
+// floating-point rather than integer comparison.
+static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
+ bool &Invert) {
+ if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
Invert = false;
return Opcode;
}
- CC = ISD::getSetCCInverse(CC, true);
- if (unsigned Opcode = getVectorComparison(CC)) {
+ CC = ISD::getSetCCInverse(CC, !IsFP);
+ if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
Invert = true;
return Opcode;
}
@@ -1935,18 +1982,46 @@ static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool &Invert) {
static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT,
ISD::CondCode CC, SDValue CmpOp0,
SDValue CmpOp1) {
+ bool IsFP = CmpOp0.getValueType().isFloatingPoint();
bool Invert = false;
SDValue Cmp;
- // It doesn't really matter whether we try the inversion or the swap first,
- // since there are no cases where both work.
- if (unsigned Opcode = getVectorComparisonOrInvert(CC, Invert))
- Cmp = DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
- else {
- CC = ISD::getSetCCSwappedOperands(CC);
- if (unsigned Opcode = getVectorComparisonOrInvert(CC, Invert))
- Cmp = DAG.getNode(Opcode, DL, VT, CmpOp1, CmpOp0);
- else
- llvm_unreachable("Unhandled comparison");
+ switch (CC) {
+ // Handle tests for order using (or (ogt y x) (oge x y)).
+ case ISD::SETUO:
+ Invert = true;
+ case ISD::SETO: {
+ assert(IsFP && "Unexpected integer comparison");
+ SDValue LT = DAG.getNode(SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
+ SDValue GE = DAG.getNode(SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
+ Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
+ break;
+ }
+
+ // Handle <> tests using (or (ogt y x) (ogt x y)).
+ case ISD::SETUEQ:
+ Invert = true;
+ case ISD::SETONE: {
+ assert(IsFP && "Unexpected integer comparison");
+ SDValue LT = DAG.getNode(SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
+ SDValue GT = DAG.getNode(SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
+ Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
+ break;
+ }
+
+ // Otherwise a single comparison is enough. It doesn't really
+ // matter whether we try the inversion or the swap first, since
+ // there are no cases where both work.
+ default:
+ if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
+ Cmp = DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
+ else {
+ CC = ISD::getSetCCSwappedOperands(CC);
+ if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
+ Cmp = DAG.getNode(Opcode, DL, VT, CmpOp1, CmpOp0);
+ else
+ llvm_unreachable("Unhandled comparison");
+ }
+ break;
}
if (Invert) {
SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
@@ -3326,6 +3401,46 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
+// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
+static bool isScalarToVector(SDValue Op) {
+ for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
+ if (Op.getOperand(I).getOpcode() != ISD::UNDEF)
+ return false;
+ return true;
+}
+
+// Return a vector of type VT that contains Value in the first element.
+// The other elements don't matter.
+static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
+ SDValue Value) {
+ // If we have a constant, replicate it to all elements and let the
+ // BUILD_VECTOR lowering take care of it.
+ if (Value.getOpcode() == ISD::Constant ||
+ Value.getOpcode() == ISD::ConstantFP) {
+ SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+ }
+ if (Value.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
+}
+
+// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
+// element 1. Used for cases in which replication is cheap.
+static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT,
+ SDValue Op0, SDValue Op1) {
+ if (Op0.getOpcode() == ISD::UNDEF) {
+ if (Op1.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(VT);
+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
+ }
+ if (Op1.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
+ return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
+ buildScalarToVector(DAG, DL, VT, Op0),
+ buildScalarToVector(DAG, DL, VT, Op1));
+}
+
// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
// vector for them.
static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0,
@@ -3502,6 +3617,10 @@ static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
if (VT == MVT::v2i64)
return joinDwords(DAG, DL, Elems[0], Elems[1]);
+ // Use a 64-bit merge high to combine two doubles.
+ if (VT == MVT::v2f64)
+ return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
+
// Collect the constant terms.
SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
@@ -3614,6 +3733,10 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
if (Res.getNode())
return Res;
+ // Detect SCALAR_TO_VECTOR conversions.
+ if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
+ return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
+
// Otherwise use buildVector to build the vector up from GPRs.
unsigned NumElements = Op.getNumOperands();
SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
@@ -3664,6 +3787,62 @@ SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
}
+SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Handle insertions of floating-point values.
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ EVT VT = Op.getValueType();
+
+ // Insertions into constant indices can be done using VPDI. However,
+ // if the inserted value is a bitcast or a constant then it's better
+ // to use GPRs, as below.
+ if (Op1.getOpcode() != ISD::BITCAST &&
+ Op1.getOpcode() != ISD::ConstantFP &&
+ Op2.getOpcode() == ISD::Constant) {
+ uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
+ unsigned Mask = VT.getVectorNumElements() - 1;
+ if (Index <= Mask)
+ return Op;
+ }
+
+ // Otherwise bitcast to the equivalent integer form and insert via a GPR.
+ MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
+ MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
+ SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
+ DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
+ DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Res);
+}
+
+SDValue
+SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Handle extractions of floating-point values.
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ EVT VecVT = Op0.getValueType();
+
+ // Extractions of constant indices can be done directly.
+ if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
+ uint64_t Index = CIndexN->getZExtValue();
+ unsigned Mask = VecVT.getVectorNumElements() - 1;
+ if (Index <= Mask)
+ return Op;
+ }
+
+ // Otherwise bitcast to the equivalent integer form and extract via a GPR.
+ MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
+ MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
+ DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Res);
+}
+
SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
unsigned ByScalar) const {
// Look for cases where a vector shift can use the *_BY_SCALAR form.
@@ -3808,6 +3987,10 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerVECTOR_SHUFFLE(Op, DAG);
case ISD::SCALAR_TO_VECTOR:
return lowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::SHL:
return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
case ISD::SRL:
@@ -3879,6 +4062,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(VICMPE);
OPCODE(VICMPH);
OPCODE(VICMPHL);
+ OPCODE(VFCMPE);
+ OPCODE(VFCMPH);
+ OPCODE(VFCMPHE);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
OPCODE(ATOMIC_LOADW_SUB);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 4b7d5908946..8319c01fc5e 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -219,6 +219,13 @@ enum {
VICMPH,
VICMPHL,
+ // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1
+ // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and
+ // greater than" and VFCMPHE for "ordered and greater than or equal to".
+ VFCMPE,
+ VFCMPH,
+ VFCMPHE,
+
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
@@ -400,6 +407,8 @@ private:
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index d94725b7913..546974aa5d8 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -118,18 +118,24 @@ let Predicates = [FeatureVector] in {
def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>;
def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>;
def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>;
+ def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)),
+ (VLREPG bdxaddr12only:$addr)>;
// Load logical element and zero.
def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>;
def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>;
def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>;
+ def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)),
+ (VLLEZG bdxaddr12only:$addr)>;
// Load element.
def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>;
def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>;
def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>;
def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>;
+ def : Pat<(z_vlef64 (v2f64 VR128:$val), bdxaddr12only:$addr, imm32zx1:$index),
+ (VLEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
// Gather element.
def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>;
@@ -152,6 +158,7 @@ defm : ReplicatePeephole<VLREPB, v16i8, anyextloadi8, i32>;
defm : ReplicatePeephole<VLREPH, v8i16, anyextloadi16, i32>;
defm : ReplicatePeephole<VLREPF, v4i32, load, i32>;
defm : ReplicatePeephole<VLREPG, v2i64, load, i64>;
+defm : ReplicatePeephole<VLREPG, v2f64, load, f64>;
//===----------------------------------------------------------------------===//
// Stores
@@ -172,6 +179,9 @@ let Predicates = [FeatureVector] in {
def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>;
def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>;
def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>;
+ def : Pat<(z_vstef64 (v2f64 VR128:$val), bdxaddr12only:$addr,
+ imm32zx1:$index),
+ (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
// Scatter element.
def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>;
@@ -188,12 +198,14 @@ let Predicates = [FeatureVector] in {
def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>;
def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>;
def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>;
+ def : BinaryRRWithType<VMRHG, VR128, z_merge_high, v2f64>;
// Merge low.
def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>;
def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>;
def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>;
def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>;
+ def : BinaryRRWithType<VMRLG, VR128, z_merge_low, v2f64>;
// Permute.
def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>;
@@ -206,6 +218,8 @@ let Predicates = [FeatureVector] in {
def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>;
def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>;
def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>;
+ def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)),
+ (VREPG VR128:$vec, imm32zx16:$index)>;
// Select.
def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>;
@@ -287,6 +301,7 @@ defm : GenericVectorOps<v16i8, v16i8>;
defm : GenericVectorOps<v8i16, v8i16>;
defm : GenericVectorOps<v4i32, v4i32>;
defm : GenericVectorOps<v2i64, v2i64>;
+defm : GenericVectorOps<v2f64, v2i64>;
//===----------------------------------------------------------------------===//
// Integer arithmetic
@@ -734,34 +749,52 @@ let Predicates = [FeatureVector] in {
// Floating-point arithmetic
//===----------------------------------------------------------------------===//
+// See comments in SystemZInstrFP.td for the suppression flags and
+// rounding modes.
+multiclass VectorRounding<Instruction insn, TypedReg tr> {
+ def : FPConversion<insn, frint, tr, tr, 0, 0>;
+ def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>;
+ def : FPConversion<insn, ffloor, tr, tr, 4, 7>;
+ def : FPConversion<insn, fceil, tr, tr, 4, 6>;
+ def : FPConversion<insn, ftrunc, tr, tr, 4, 5>;
+ def : FPConversion<insn, frnd, tr, tr, 4, 1>;
+}
+
let Predicates = [FeatureVector] in {
// Add.
- def VFADB : BinaryVRRc<"vfadb", 0xE7E3, null_frag, v128db, v128db, 3, 0>;
+ def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
def WFADB : BinaryVRRc<"wfadb", 0xE7E3, null_frag, v64db, v64db, 3, 8>;
// Convert from fixed 64-bit.
def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
+ def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
// Convert from logical 64-bit.
def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
+ def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
// Convert to fixed 64-bit.
def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
+ // Rounding mode should agree with SystemZInstrFP.td.
+ def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
// Convert to logical 64-bit.
def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
+ // Rounding mode should agree with SystemZInstrFP.td.
+ def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
// Divide.
- def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, null_frag, v128db, v128db, 3, 0>;
+ def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, null_frag, v64db, v64db, 3, 8>;
// Load FP integer.
def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, null_frag, v128db, v128db, 3, 0>;
def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
+ defm : VectorRounding<VFIDB, v128db>;
// Load lengthened.
def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, null_frag, v128db, v128eb, 2, 0>;
@@ -772,35 +805,35 @@ let Predicates = [FeatureVector] in {
def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
// Multiply.
- def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, null_frag, v128db, v128db, 3, 0>;
+ def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, null_frag, v64db, v64db, 3, 8>;
// Multiply and add.
- def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, null_frag, v128db, v128db, 0, 3>;
+ def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, null_frag, v64db, v64db, 8, 3>;
// Multiply and subtract.
- def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, null_frag, v128db, v128db, 0, 3>;
+ def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, null_frag, v64db, v64db, 8, 3>;
// Load complement,
- def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, null_frag, v128db, v128db, 3, 0, 0>;
+ def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 0>;
// Load negative.
- def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, null_frag, v128db, v128db, 3, 0, 1>;
+ def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 1>;
// Load positive.
- def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, null_frag, v128db, v128db, 3, 0, 2>;
+ def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, null_frag, v64db, v64db, 3, 8, 2>;
// Square root.
- def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, null_frag, v128db, v128db, 3, 0>;
+ def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, null_frag, v64db, v64db, 3, 8>;
// Subtract.
- def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, null_frag, v128db, v128db, 3, 0>;
+ def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, null_frag, v64db, v64db, 3, 8>;
// Test data class immediate.
@@ -824,19 +857,19 @@ let Predicates = [FeatureVector] in {
def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
// Compare equal.
- defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, null_frag, null_frag,
+ defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, null_frag,
v128g, v128db, 3, 0>;
defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
v64g, v64db, 3, 8>;
// Compare high.
- defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, null_frag, null_frag,
+ defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, null_frag,
v128g, v128db, 3, 0>;
defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
v64g, v64db, 3, 8>;
// Compare high or equal.
- defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, null_frag, null_frag,
+ defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, null_frag,
v128g, v128db, 3, 0>;
defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
v64g, v64db, 3, 8>;
@@ -849,18 +882,27 @@ let Predicates = [FeatureVector] in {
def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
//===----------------------------------------------------------------------===//
// Replicating scalars
@@ -881,6 +923,46 @@ def : Pat<(v2i64 (z_replicate GR64:$scalar)),
(VLVGP GR64:$scalar, GR64:$scalar)>;
//===----------------------------------------------------------------------===//
+// Floating-point insertion and extraction
+//===----------------------------------------------------------------------===//
+
+// Floating-point values are stored in element 0 of the corresponding
+// vector register. Scalar to vector conversion is just a subreg and
+// scalar replication can just replicate element 0 of the vector register.
+multiclass ScalarToVectorFP<Instruction vrep, ValueType vt, RegisterOperand cls,
+ SubRegIndex subreg> {
+ def : Pat<(vt (scalar_to_vector cls:$scalar)),
+ (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, subreg)>;
+ def : Pat<(vt (z_replicate cls:$scalar)),
+ (vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar,
+ subreg), 0)>;
+}
+defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>;
+
+// Match v2f64 insertions. The AddedComplexity counters the 3 added by
+// TableGen for the base register operand in VLVG-based integer insertions
+// and ensures that this version is strictly better.
+let AddedComplexity = 4 in {
+ def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0),
+ (VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
+ subreg_r64), VR128:$vec, 1)>;
+ def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1),
+ (VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
+ subreg_r64), 0)>;
+}
+
+// We extract f64 element X by replicating (for elements other than 0)
+// and then taking a high subreg. The AddedComplexity counters the 3
+// added by TableGen for the base register operand in VLGV-based integer
+// extractions and ensures that this version is strictly better.
+let AddedComplexity = 4 in {
+ def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)),
+ (EXTRACT_SUBREG VR128:$vec, subreg_r64)>;
+ def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)),
+ (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>;
+}
+
+//===----------------------------------------------------------------------===//
// String instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 2e431859a86..7cf7d862ffe 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -200,6 +200,9 @@ def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>;
def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>;
+def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
+def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>;
+def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>;
class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
: SDNode<"SystemZISD::"##name, profile,
@@ -468,6 +471,10 @@ def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>;
def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(add (mul node:$src1, node:$src2), node:$src3)>;
+// Fused multiply-subtract, using the natural operand order.
+def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fma node:$src1, node:$src2, (fneg node:$src3))>;
+
// Fused multiply-add and multiply-subtract, but with the order of the
// operands matching SystemZ's MA and MS instructions.
def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
@@ -501,6 +508,7 @@ def z_replicate_loadi8 : z_replicate_load<i32, anyextloadi8>;
def z_replicate_loadi16 : z_replicate_load<i32, anyextloadi16>;
def z_replicate_loadi32 : z_replicate_load<i32, load>;
def z_replicate_loadi64 : z_replicate_load<i64, load>;
+def z_replicate_loadf64 : z_replicate_load<f64, load>;
// Load a scalar and insert it into a single element of a vector.
class z_vle<ValueType scalartype, SDPatternOperator load>
@@ -511,6 +519,7 @@ def z_vlei8 : z_vle<i32, anyextloadi8>;
def z_vlei16 : z_vle<i32, anyextloadi16>;
def z_vlei32 : z_vle<i32, load>;
def z_vlei64 : z_vle<i64, load>;
+def z_vlef64 : z_vle<f64, load>;
// Load a scalar and insert it into the low element of the high i64 of a
// zeroed vector.
@@ -523,6 +532,10 @@ def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
def z_vllezi32 : z_vllez<i32, load, 1>;
def z_vllezi64 : PatFrag<(ops node:$addr),
(z_join_dwords (i64 (load node:$addr)), (i64 0))>;
+def z_vllezf64 : PatFrag<(ops node:$addr),
+ (z_merge_high
+ (scalar_to_vector (f64 (load node:$addr))),
+ (z_vzero))>;
// Store one element of a vector.
class z_vste<ValueType scalartype, SDPatternOperator store>
@@ -533,6 +546,7 @@ def z_vstei8 : z_vste<i32, truncstorei8>;
def z_vstei16 : z_vste<i32, truncstorei16>;
def z_vstei32 : z_vste<i32, store>;
def z_vstei64 : z_vste<i64, store>;
+def z_vstef64 : z_vste<f64, store>;
// Arithmetic negation on vectors.
def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>;
diff --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td
index e307f8a888e..16a7ed784d7 100644
--- a/llvm/lib/Target/SystemZ/SystemZPatterns.td
+++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -153,3 +153,17 @@ multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> {
// The sign of the zero makes no difference.
def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
}
+
+// Use INSN for performing binary operation OPERATION of type VT
+// on registers of class CLS.
+class BinaryRRWithType<Instruction insn, RegisterOperand cls,
+ SDPatternOperator operator, ValueType vt>
+ : Pat<(vt (operator cls:$x, cls:$y)), (insn cls:$x, cls:$y)>;
+
+// Use INSN to perform conversion operation OPERATOR, with the input being
+// TR2 and the output being TR1. SUPPRESS is 4 to suppress inexact conditions
+// and 0 to allow them. MODE is the rounding mode to use.
+class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1,
+ TypedReg tr2, bits<3> suppress, bits<4> mode>
+ : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))),
+ (insn tr2.op:$vec, suppress, mode)>;
OpenPOWER on IntegriCloud