diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 33 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_cast2.ll | 9 |
3 files changed, 45 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4af854ec0c9..c50625e2080 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2425,6 +2425,39 @@ SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) { InOp.getValueType().getVectorNumElements() && "Input wasn't widened!"); + // We may need to further widen the operand until it has the same total + // vector size as the result. + EVT InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) { + EVT InEltVT = InVT.getVectorElementType(); + for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { + EVT FixedVT = (MVT::SimpleValueType)i; + EVT FixedEltVT = FixedVT.getVectorElementType(); + if (TLI.isTypeLegal(FixedVT) && + FixedVT.getSizeInBits() == VT.getSizeInBits() && + FixedEltVT == InEltVT) { + assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && + "Not enough elements in the fixed type for the operand!"); + assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && + "We can't have the same type as we started with!"); + if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) + InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, + DAG.getUNDEF(FixedVT), InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + else + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + break; + } + } + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) + // We couldn't find a legal vector type that was a widening of the input + // and could be extended in-register to the result type, so we have to + // scalarize. + return WidenVecOp_Convert(N); + } + // Use a special DAG node to represent the operation of zero extending the // low lanes. return DAG.getZeroExtendVectorInReg(InOp, DL, VT); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9b65d90383b..119b0255e7c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1034,6 +1034,9 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && "The destination vector type must have fewer lanes than the input."); return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op); diff --git a/llvm/test/CodeGen/X86/vec_cast2.ll b/llvm/test/CodeGen/X86/vec_cast2.ll index 5f6e7a853a3..c7f1554cdd4 100644 --- a/llvm/test/CodeGen/X86/vec_cast2.ll +++ b/llvm/test/CodeGen/X86/vec_cast2.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE ;CHECK-LABEL: foo1_8: ;CHECK: vcvtdq2ps @@ -19,6 +20,10 @@ define <4 x float> @foo1_4(<4 x i8> %src) { ;CHECK-LABEL: foo2_8: ;CHECK: vcvtdq2ps ;CHECK: ret +; +;CHECK-WIDE-LABEL: foo2_8: +;CHECK-WIDE: vcvtdq2ps %ymm{{.*}}, %ymm{{.*}} +;CHECK-WIDE: ret define <8 x float> @foo2_8(<8 x i8> %src) { %res = uitofp <8 x i8> %src to <8 x float> ret <8 x float> %res @@ -27,6 +32,10 @@ define <8 x float> @foo2_8(<8 x i8> %src) { ;CHECK-LABEL: foo2_4: ;CHECK: vcvtdq2ps ;CHECK: ret +; +;CHECK-WIDE-LABEL: foo2_4: +;CHECK-WIDE: vcvtdq2ps %xmm{{.*}}, %xmm{{.*}} +;CHECK-WIDE: ret define <4 x float> @foo2_4(<4 x i8> %src) { %res = uitofp <4 x i8> %src to <4 x float> ret <4 x float> %res |

