summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h9
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h1
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp42
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp22
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp1
-rw-r--r--llvm/test/CodeGen/X86/widen_conversions.ll18
9 files changed, 106 insertions, 1 deletions
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 80fb8b2d3a5..fb138f629aa 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -379,6 +379,15 @@ namespace ISD {
/// operand, a ValueType node.
SIGN_EXTEND_INREG,
+ /// ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an
+ /// in-register zero-extension of the low lanes of an integer vector. The
+ /// result type must have fewer elements than the operand type, and those
+ /// elements must be larger integer types such that the total size of the
+ /// operand type and the result type match. Each of the low operand
+ /// elements is zero-extended into the corresponding, wider result
+ /// elements.
+ ZERO_EXTEND_VECTOR_INREG,
+
/// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned
/// integer.
FP_TO_SINT,
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index db2e841e172..c2ae553c778 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -562,6 +562,12 @@ public:
/// value assuming it was the smaller SrcTy value.
SDValue getZeroExtendInReg(SDValue Op, SDLoc DL, EVT SrcTy);
+ /// getZeroExtendVectorInReg - Return an operation which will zero extend the
+ /// low lanes of the operand into the specified vector type. For example,
+ /// this can convert a v16i8 into a v4i32 by zero extending the low four
+ /// lanes of the operand from i8 to i32.
+ SDValue getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT);
+
/// getBoolExtOrTrunc - Convert Op, which must be of integer type, to the
/// integer type VT, by using an extension appropriate for the target's
/// BooleanContent or truncating it.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 6ef2d7f0bc9..a6bbc218dd2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -649,6 +649,7 @@ private:
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_SETCC(SDNode* N);
+ SDValue WidenVecOp_ZERO_EXTEND(SDNode *N);
SDValue WidenVecOp_Convert(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2c776746982..3fa64843a93 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -75,6 +75,12 @@ class VectorLegalizer {
/// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
SDValue ExpandSEXTINREG(SDValue Op);
+ /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place and blends zeros into
+ /// the remaining lanes, finally bitcasting to the proper type.
+ SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
+
/// \brief Expand bswap of vectors into a shuffle if legal.
SDValue ExpandBSWAP(SDValue Op);
@@ -274,6 +280,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FP_EXTEND:
case ISD::FMA:
case ISD::SIGN_EXTEND_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
QueryType = Node->getValueType(0);
break;
case ISD::FP_ROUND_INREG:
@@ -614,6 +621,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
switch (Op->getOpcode()) {
case ISD::SIGN_EXTEND_INREG:
return ExpandSEXTINREG(Op);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return ExpandZERO_EXTEND_VECTOR_INREG(Op);
case ISD::BSWAP:
return ExpandBSWAP(Op);
case ISD::VSELECT:
@@ -708,6 +717,39 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
}
+// Generically expand a vector zext in register to a shuffle of the relevant
+// lanes into the appropriate locations, a blend of zero into the high bits,
+// and a bitcast to the wider element type.
+SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ int NumElements = VT.getVectorNumElements();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ int NumSrcElements = SrcVT.getVectorNumElements();
+
+ // Build up a zero vector to blend into this one.
+ EVT SrcScalarVT = SrcVT.getScalarType();
+ SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT);
+ SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero);
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands);
+
+ // Shuffle the incoming lanes into the correct position, and pull all other
+ // lanes from the zero vector.
+ SmallVector<int, 16> ShuffleMask;
+ ShuffleMask.reserve(NumSrcElements);
+ for (int i = 0; i < NumSrcElements; ++i)
+ ShuffleMask.push_back(i);
+
+ int ExtLaneScale = NumSrcElements / NumElements;
+ int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
+
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
+}
+
SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
EVT VT = Op.getValueType();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 74f8f72c7ab..dc61577d6a3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2380,6 +2380,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::ZERO_EXTEND: Res = WidenVecOp_ZERO_EXTEND(N); break;
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
@@ -2388,7 +2389,6 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::UINT_TO_FP:
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
Res = WidenVecOp_Convert(N);
break;
@@ -2410,6 +2410,26 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
return false;
}
+SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue InOp = N->getOperand(0);
+ // If some legalization strategy other than widening is used on the operand,
+ // we can't safely assume that just zero-extending the low lanes is the
+ // correct transformation.
+ if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
+ return WidenVecOp_Convert(N);
+ InOp = GetWidenedVector(InOp);
+ EVT InVT = InOp.getValueType();
+ assert(NumElts < InVT.getVectorNumElements() && "Input wasn't widened!");
+
+ // Use a special DAG node to represent the operation of zero extending the
+ // low lanes.
+ return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Since the result is legal and the input is illegal, it is unlikely
// that we can fix the input to a legal type so unroll the convert
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c2cb0d8d779..9b65d90383b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1032,6 +1032,13 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) {
getConstant(Imm, Op.getValueType()));
}
+SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
///
SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index c92fb2453c2..63746e1cdc0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -221,6 +221,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ZERO_EXTEND: return "zero_extend";
case ISD::ANY_EXTEND: return "any_extend";
case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
case ISD::TRUNCATE: return "truncate";
case ISD::FP_ROUND: return "fp_round";
case ISD::FLT_ROUNDS_: return "flt_rounds";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 188a0befcbd..70d26ee1561 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -869,6 +869,7 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::TRUNCATE, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::ANY_EXTEND, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
diff --git a/llvm/test/CodeGen/X86/widen_conversions.ll b/llvm/test/CodeGen/X86/widen_conversions.ll
new file mode 100644
index 00000000000..522ab475c2a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/widen_conversions.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=x86-64 -x86-experimental-vector-widening-legalization -x86-experimental-vector-shuffle-lowering | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define <4 x i32> @zext_v4i8_to_v4i32(<4 x i8>* %ptr) {
+; CHECK-LABEL: zext_v4i8_to_v4i32:
+;
+; CHECK: movd (%{{.*}}), %[[X:xmm[0-9]+]]
+; CHECK-NEXT: pxor %[[Z:xmm[0-9]+]], %[[Z]]
+; CHECK-NEXT: punpcklbw %[[Z]], %[[X]]
+; CHECK-NEXT: punpcklbw %[[Z]], %[[X]]
+; CHECK-NEXT: ret
+
+ %val = load <4 x i8>* %ptr
+ %ext = zext <4 x i8> %val to <4 x i32>
+ ret <4 x i32> %ext
+}
OpenPOWER on IntegriCloud