diff options
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 7 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/vpadd.ll | 11 | 
2 files changed, 16 insertions, 2 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 0f43b324367..737007aa4cd 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -8254,7 +8254,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,    // Get widened type and narrowed type.    MVT widenType;    unsigned numElem = VT.getVectorNumElements(); -  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { +   +  EVT inputLaneType = Vec.getValueType().getVectorElementType(); +  switch (inputLaneType.getSimpleVT().SimpleTy) {      case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;      case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;      case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; @@ -8264,7 +8266,8 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,    SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),                              widenType, &Ops[0], Ops.size()); -  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp); +  unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; +  return DAG.getNode(ExtOp, SDLoc(N), VT, tmp);  }  static SDValue findMUL_LOHI(SDValue V) { diff --git a/llvm/test/CodeGen/ARM/vpadd.ll b/llvm/test/CodeGen/ARM/vpadd.ll index f84721f996c..ecaabd3b9f7 100644 --- a/llvm/test/CodeGen/ARM/vpadd.ll +++ b/llvm/test/CodeGen/ARM/vpadd.ll @@ -152,6 +152,17 @@ define void @addCombineToVPADDL() nounwind ssp {    ret void  } +; Legalization produces a EXTRACT_VECTOR_ELT DAG node which performs an extend from +; i16 to i32. In this case the input for the formed VPADDL needs to be a vector of i16s. +define <2 x i16> @fromExtendingExtractVectorElt(<4 x i16> %in) { +;CHECK-LABEL: fromExtendingExtractVectorElt: +;CHECK: vpaddl.s16 +  %tmp1 = shufflevector <4 x i16> %in, <4 x i16> undef, <2 x i32> <i32 0, i32 2> +  %tmp2 = shufflevector <4 x i16> %in, <4 x i16> undef, <2 x i32> <i32 1, i32 3> +  %x = add <2 x i16> %tmp2, %tmp1 +  ret <2 x i16> %x +} +  declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone  declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone  declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone  | 

