summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp7
-rw-r--r--llvm/test/CodeGen/ARM/vpadd.ll11
2 files changed, 16 insertions, 2 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 0f43b324367..737007aa4cd 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -8254,7 +8254,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
// Get widened type and narrowed type.
MVT widenType;
unsigned numElem = VT.getVectorNumElements();
- switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+
+ EVT inputLaneType = Vec.getValueType().getVectorElementType();
+ switch (inputLaneType.getSimpleVT().SimpleTy) {
case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
@@ -8264,7 +8266,8 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
widenType, &Ops[0], Ops.size());
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp);
+ unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
+ return DAG.getNode(ExtOp, SDLoc(N), VT, tmp);
}
static SDValue findMUL_LOHI(SDValue V) {
diff --git a/llvm/test/CodeGen/ARM/vpadd.ll b/llvm/test/CodeGen/ARM/vpadd.ll
index f84721f996c..ecaabd3b9f7 100644
--- a/llvm/test/CodeGen/ARM/vpadd.ll
+++ b/llvm/test/CodeGen/ARM/vpadd.ll
@@ -152,6 +152,17 @@ define void @addCombineToVPADDL() nounwind ssp {
ret void
}
+; Legalization produces a EXTRACT_VECTOR_ELT DAG node which performs an extend from
+; i16 to i32. In this case the input for the formed VPADDL needs to be a vector of i16s.
+define <2 x i16> @fromExtendingExtractVectorElt(<4 x i16> %in) {
+;CHECK-LABEL: fromExtendingExtractVectorElt:
+;CHECK: vpaddl.s16
+ %tmp1 = shufflevector <4 x i16> %in, <4 x i16> undef, <2 x i32> <i32 0, i32 2>
+ %tmp2 = shufflevector <4 x i16> %in, <4 x i16> undef, <2 x i32> <i32 1, i32 3>
+ %x = add <2 x i16> %tmp2, %tmp1
+ ret <2 x i16> %x
+}
+
declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
OpenPOWER on IntegriCloud