summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/SystemZ
diff options
context:
space:
mode:
authorUlrich Weigand <ulrich.weigand@de.ibm.com>2015-05-05 19:29:21 +0000
committerUlrich Weigand <ulrich.weigand@de.ibm.com>2015-05-05 19:29:21 +0000
commitcd2a1b5341a3c42b1a56f8f301bd0de0343b5e8e (patch)
tree1a531a74e28c6c4ffbffa35ad6872e8cbe5f38e3 /llvm/lib/Target/SystemZ
parent49506d78e7f437785f4d3f68063f4aa9c622bb2c (diff)
downloadbcm5719-llvm-cd2a1b5341a3c42b1a56f8f301bd0de0343b5e8e.tar.gz
bcm5719-llvm-cd2a1b5341a3c42b1a56f8f301bd0de0343b5e8e.zip
[SystemZ] Handle sub-128 vectors
The ABI allows sub-128 vectors to be passed and returned in registers, with the vector occupying the upper part of a register. We therefore want to legalize those types by widening the vector rather than promoting the elements. The patch includes some simple tests for sub-128 vectors and also tests that we can recognize various pack sequences, some of which use sub-128 vectors as temporary results. One of these forms is based on the pack sequences generated by llvmpipe when no intrinsics are used. Signed unpacks are recognized as BUILD_VECTORs whose elements are individually sign-extended. Unsigned unpacks can have the equivalent form with zero extension, but they also occur as shuffles in which some elements are zero. Based on a patch by Richard Sandiford. llvm-svn: 236525
Diffstat (limited to 'llvm/lib/Target/SystemZ')
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.h17
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.td17
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp81
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h28
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrVector.td24
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperators.td15
6 files changed, 155 insertions, 27 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
index 8b8146762b6..bff0706618a 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -28,6 +28,14 @@ private:
/// See ISD::OutputArg::IsFixed.
SmallVector<bool, 4> ArgIsFixed;
+ /// Records whether the value was widened from a short vector type.
+ SmallVector<bool, 4> ArgIsShortVector;
+
+ // Check whether ArgVT is a short vector type.
+ bool IsShortVectorType(EVT ArgVT) {
+ return ArgVT.isVector() && ArgVT.getStoreSize() <= 8;
+ }
+
public:
SystemZCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
@@ -39,6 +47,10 @@ public:
ArgIsFixed.clear();
for (unsigned i = 0; i < Ins.size(); ++i)
ArgIsFixed.push_back(true);
+ // Record whether the call operand was a short vector.
+ ArgIsShortVector.clear();
+ for (unsigned i = 0; i < Ins.size(); ++i)
+ ArgIsShortVector.push_back(IsShortVectorType(Ins[i].ArgVT));
CCState::AnalyzeFormalArguments(Ins, Fn);
}
@@ -49,6 +61,10 @@ public:
ArgIsFixed.clear();
for (unsigned i = 0; i < Outs.size(); ++i)
ArgIsFixed.push_back(Outs[i].IsFixed);
+ // Record whether the call operand was a short vector.
+ ArgIsShortVector.clear();
+ for (unsigned i = 0; i < Outs.size(); ++i)
+ ArgIsShortVector.push_back(IsShortVectorType(Outs[i].ArgVT));
CCState::AnalyzeCallOperands(Outs, Fn);
}
@@ -60,6 +76,7 @@ public:
CCAssignFn Fn) = delete;
bool IsFixed(unsigned ValNo) { return ArgIsFixed[ValNo]; }
+ bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index a2f996e60df..be8f00b57ad 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -21,6 +21,11 @@ class CCIfSubtarget<string F, CCAction A>
class CCIfFixed<CCAction A>
: CCIf<"static_cast<SystemZCCState *>(&State)->IsFixed(ValNo)", A>;
+// Match if this specific argument was widened from a short vector type.
+class CCIfShortVector<CCAction A>
+ : CCIf<"static_cast<SystemZCCState *>(&State)->IsShortVector(ValNo)", A>;
+
+
//===----------------------------------------------------------------------===//
// z/Linux return value calling convention
//===----------------------------------------------------------------------===//
@@ -43,6 +48,8 @@ def RetCC_SystemZ : CallingConv<[
CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
// Similarly for vectors, with V24 being the ABI-compliant choice.
+ // Sub-128 vectors are returned in the same way, but they're widened
+ // to one of these types during type legalization.
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
@@ -74,12 +81,20 @@ def CC_SystemZ : CallingConv<[
CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
- // The first 8 named vector arguments are passed in V24-V31.
+ // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors
+ // are passed in the same way, but they're widened to one of these types
+ // during type legalization.
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfFixed<CCAssignToReg<[V24, V26, V28, V30,
V25, V27, V29, V31]>>>>,
+ // However, sub-128 vectors which need to go on the stack occupy just a
+ // single 8-byte-aligned 8-byte stack slot. Pass as i64.
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfShortVector<CCBitConvertToType<i64>>>>,
+
// Other vector arguments are passed in 8-byte-aligned 16-byte stack slots.
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index ff79a48179f..c3842519008 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -318,6 +318,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
// Convert a GPR scalar to a vector by inserting it into element 0.
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ // Use a series of unpacks for extensions.
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
+
// Detect shifts by a scalar amount and convert them into
// V*_BY_SCALAR.
setOperationAction(ISD::SHL, VT, Custom);
@@ -793,7 +797,15 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
else if (VA.getLocInfo() == CCValAssign::Indirect)
Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
MachinePointerInfo(), false, false, false, 0);
- else
+ else if (VA.getLocInfo() == CCValAssign::BCvt) {
+ // If this is a short vector argument loaded from the stack,
+ // extend from i64 to full vector size and then bitcast.
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT().isVector());
+ Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64,
+ Value, DAG.getUNDEF(MVT::i64));
+ Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
+ } else
assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
return Value;
}
@@ -810,6 +822,14 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
case CCValAssign::AExt:
return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
+ case CCValAssign::BCvt:
+ // If this is a short vector argument to be stored to the stack,
+ // bitcast to v2i64 and then extract first element.
+ assert(VA.getLocVT() == MVT::i64);
+ assert(VA.getValVT().isVector());
+ Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
+ DAG.getConstant(0, DL, MVT::i32));
case CCValAssign::Full:
return Value;
default:
@@ -3910,6 +3930,23 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getNode(ISD::BITCAST, DL, VT, Res);
}
+SDValue
+SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
+ unsigned UnpackHigh) const {
+ SDValue PackedOp = Op.getOperand(0);
+ EVT OutVT = Op.getValueType();
+ EVT InVT = PackedOp.getValueType();
+ unsigned ToBits = OutVT.getVectorElementType().getSizeInBits();
+ unsigned FromBits = InVT.getVectorElementType().getSizeInBits();
+ do {
+ FromBits *= 2;
+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
+ SystemZ::VectorBits / FromBits);
+ PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
+ } while (FromBits != ToBits);
+ return PackedOp;
+}
+
SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
unsigned ByScalar) const {
// Look for cases where a vector shift can use the *_BY_SCALAR form.
@@ -4058,6 +4095,10 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
case ISD::SHL:
return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
case ISD::SRL:
@@ -4122,6 +4163,10 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(PERMUTE_DWORDS);
OPCODE(PERMUTE);
OPCODE(PACK);
+ OPCODE(UNPACK_HIGH);
+ OPCODE(UNPACKL_HIGH);
+ OPCODE(UNPACK_LOW);
+ OPCODE(UNPACKL_LOW);
OPCODE(VSHL_BY_SCALAR);
OPCODE(VSRL_BY_SCALAR);
OPCODE(VSRA_BY_SCALAR);
@@ -4334,17 +4379,35 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
- // (z_merge_high 0, 0) -> 0. This is mostly useful for using VLLEZF
- // for v4f32.
- if (Opcode == SystemZISD::MERGE_HIGH) {
+ if (Opcode == SystemZISD::MERGE_HIGH ||
+ Opcode == SystemZISD::MERGE_LOW) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
- if (Op0 == Op1) {
- if (Op0.getOpcode() == ISD::BITCAST)
- Op0 = Op0.getOperand(0);
- if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
- cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0)
+ if (Op0.getOpcode() == ISD::BITCAST)
+ Op0 = Op0.getOperand(0);
+ if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
+ cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
+ // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
+ // for v4f32.
+ if (Op1 == N->getOperand(0))
return Op1;
+ // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
+ EVT VT = Op1.getValueType();
+ unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
+ if (ElemBytes <= 4) {
+ Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
+ SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
+ EVT InVT = VT.changeVectorElementTypeToInteger();
+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
+ SystemZ::VectorBytes / ElemBytes / 2);
+ if (VT != InVT) {
+ Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
+ DCI.AddToWorklist(Op1.getNode());
+ }
+ SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
+ DCI.AddToWorklist(Op.getNode());
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+ }
}
}
// If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 24a3f4bb5d4..7a3b6fa85ae 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -201,6 +201,15 @@ enum {
// Pack vector operands 0 and 1 into a single vector with half-sized elements.
PACK,
+ // Unpack the first half of vector operand 0 into double-sized elements.
+ // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends.
+ UNPACK_HIGH,
+ UNPACKL_HIGH,
+
+ // Likewise for the second half.
+ UNPACK_LOW,
+ UNPACKL_LOW,
+
// Shift each element of vector operand 0 by the number of bits specified
// by scalar operand 1.
VSHL_BY_SCALAR,
@@ -306,6 +315,23 @@ public:
// want to clobber the upper 32 bits of a GPR unnecessarily.
return MVT::i32;
}
+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+ const override {
+ // Widen subvectors to the full width rather than promoting integer
+ // elements. This is better because:
+ //
+ // (a) it means that we can handle the ABI for passing and returning
+ // sub-128 vectors without having to handle them as legal types.
+ //
+ // (b) we don't have instructions to extend on load and truncate on store,
+ // so promoting the integers is less efficient.
+ //
+ // (c) there are no multiplication instructions for the widest integer
+ // type (v2i64).
+ if (VT.getVectorElementType().getSizeInBits() % 8 == 0)
+ return TypeWidenVector;
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+ }
EVT getSetCCResultType(LLVMContext &, EVT) const override;
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
@@ -417,6 +443,8 @@ private:
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
+ unsigned UnpackHigh) const;
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 8abaeb69a20..f95714d1e70 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -290,24 +290,24 @@ let Predicates = [FeatureVector] in {
def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>;
// Unpack high.
- def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, null_frag, v128h, v128b, 0>;
- def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, null_frag, v128f, v128h, 1>;
- def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, null_frag, v128g, v128f, 2>;
+ def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, z_unpack_high, v128h, v128b, 0>;
+ def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, z_unpack_high, v128f, v128h, 1>;
+ def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, z_unpack_high, v128g, v128f, 2>;
// Unpack logical high.
- def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, null_frag, v128h, v128b, 0>;
- def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, null_frag, v128f, v128h, 1>;
- def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, null_frag, v128g, v128f, 2>;
+ def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, z_unpackl_high, v128h, v128b, 0>;
+ def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, z_unpackl_high, v128f, v128h, 1>;
+ def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, z_unpackl_high, v128g, v128f, 2>;
// Unpack low.
- def VUPLB : UnaryVRRa<"vuplb", 0xE7D6, null_frag, v128h, v128b, 0>;
- def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, null_frag, v128f, v128h, 1>;
- def VUPLF : UnaryVRRa<"vuplf", 0xE7D6, null_frag, v128g, v128f, 2>;
+ def VUPLB : UnaryVRRa<"vuplb", 0xE7D6, z_unpack_low, v128h, v128b, 0>;
+ def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, z_unpack_low, v128f, v128h, 1>;
+ def VUPLF : UnaryVRRa<"vuplf", 0xE7D6, z_unpack_low, v128g, v128f, 2>;
// Unpack logical low.
- def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, null_frag, v128h, v128b, 0>;
- def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, null_frag, v128f, v128h, 1>;
- def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, null_frag, v128g, v128f, 2>;
+ def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, z_unpackl_low, v128h, v128b, 0>;
+ def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, z_unpackl_low, v128f, v128h, 1>;
+ def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, z_unpackl_low, v128g, v128f, 2>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 63c217413ac..9bf288aa68e 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -193,6 +193,10 @@ def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS",
SDT_ZVecTernaryInt>;
def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>;
def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>;
+def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>;
+def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>;
+def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>;
+def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnaryConv>;
def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR",
SDT_ZVecBinaryInt>;
def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR",
@@ -544,11 +548,12 @@ def z_vllezi64 : PatFrag<(ops node:$addr),
def z_vllezf32 : PatFrag<(ops node:$addr),
(bitconvert
(z_merge_high
- (v2i64 (bitconvert
- (z_merge_high
- (v4f32 (z_vzero)),
- (v4f32 (scalar_to_vector
- (f32 (load node:$addr))))))),
+ (v2i64
+ (z_unpackl_high
+ (v4i32
+ (bitconvert
+ (v4f32 (scalar_to_vector
+ (f32 (load node:$addr)))))))),
(v2i64 (z_vzero))))>;
def z_vllezf64 : PatFrag<(ops node:$addr),
(z_merge_high
OpenPOWER on IntegriCloud