1 files changed, 39 insertions, 27 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ec10968874e..e45ca4dbc0d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4110,6 +4110,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
 // shuffle in combination with VEXTs.
 SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
                                                   SelectionDAG &DAG) const {
+  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
   unsigned NumElts = VT.getVectorNumElements();
@@ -4158,35 +4159,47 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
 
   SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
   int VEXTOffsets[2] = { 0, 0 };
+  int OffsetMultipliers[2] = { 1, 1 };
 
   // This loop extracts the usage patterns of the source vectors
   // and prepares appropriate SDValues for a shuffle if possible.
   for (unsigned i = 0; i < SourceVecs.size(); ++i) {
-    if (SourceVecs[i].getValueType() == VT) {
+    unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
+    SDValue CurSource = SourceVecs[i];
+    if (SourceVecs[i].getValueType().getVectorElementType() !=
+        VT.getVectorElementType()) {
+      // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
+      // Then bitcast it to the vector which holds asserted element type,
+      // and record the multiplier of element width between SourceVecs and
+      // Build_vector which is needed to extract the correct lanes later.
+      EVT CastVT =
+          EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+                           SourceVecs[i].getValueSizeInBits() /
+                               VT.getVectorElementType().getSizeInBits());
+
+      CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
+      OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
+      NumSrcElts *= OffsetMultipliers[i];
+      MaxElts[i] *= OffsetMultipliers[i];
+      MinElts[i] *= OffsetMultipliers[i];
+    }
+
+    if (CurSource.getValueType() == VT) {
       // No VEXT necessary
-      ShuffleSrcs[i] = SourceVecs[i];
+      ShuffleSrcs[i] = CurSource;
       VEXTOffsets[i] = 0;
       continue;
-    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+    } else if (NumSrcElts < NumElts) {
       // We can pad out the smaller vector for free, so if it's part of a
       // shuffle...
-      ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i],
-                                   DAG.getUNDEF(SourceVecs[i].getValueType()));
+      ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
+                                   DAG.getUNDEF(CurSource.getValueType()));
       continue;
     }
 
-    // Don't attempt to extract subvectors from BUILD_VECTOR sources
-    // that expand or trunc the original value.
-    // TODO: We can try to bitcast and ANY_EXTEND the result but
-    // we need to consider the cost of vector ANY_EXTEND, and the
-    // legality of all the types.
-    if (SourceVecs[i].getValueType().getVectorElementType() !=
-        VT.getVectorElementType())
-      return SDValue();
-
     // Since only 64-bit and 128-bit vectors are legal on ARM and
     // we've eliminated the other cases...
-    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts &&
+    assert(NumSrcElts == 2 * NumElts &&
            "unexpected vector sizes in ReconstructShuffle");
 
     if (MaxElts[i] - MinElts[i] >= NumElts) {
@@ -4197,22 +4210,20 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     if (MinElts[i] >= NumElts) {
       // The extraction can just take the second half
       VEXTOffsets[i] = NumElts;
-      ShuffleSrcs[i] =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
-                      DAG.getIntPtrConstant(NumElts));
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+                                   DAG.getIntPtrConstant(NumElts));
     } else if (MaxElts[i] < NumElts) {
       // The extraction can just take the first half
       VEXTOffsets[i] = 0;
-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                                   SourceVecs[i], DAG.getIntPtrConstant(0));
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+                                   DAG.getIntPtrConstant(0));
     } else {
       // An actual VEXT is needed
       VEXTOffsets[i] = MinElts[i];
-      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                                     SourceVecs[i], DAG.getIntPtrConstant(0));
-      SDValue VEXTSrc2 =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
-                      DAG.getIntPtrConstant(NumElts));
+      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+                                     DAG.getIntPtrConstant(0));
+      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+                                     DAG.getIntPtrConstant(NumElts));
       unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
       ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
                                    DAG.getConstant(Imm, MVT::i32));
@@ -4232,9 +4243,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     int ExtractElt =
         cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
     if (ExtractVec == SourceVecs[0]) {
-      Mask.push_back(ExtractElt - VEXTOffsets[0]);
+      Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
     } else {
-      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+      Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
+                     VEXTOffsets[1]);
     }
   }