[AArch64] Fix a silent codegen fault in BUILD_VECTOR lowering.

We should be talking about the number of source elements, not the number of destination elements, given we know at this point that the source and dest element numbers are not the same. While we're at it, avoid writing to std::vector::end()... Bug found with random testing and a lot of coffee. llvm-svn: 220051
author: James Molloy <james.molloy@arm.com> 2014-10-17 17:06:31 +0000
committer: James Molloy <james.molloy@arm.com> 2014-10-17 17:06:31 +0000
commit: f497d5511d1aff061c3af95b13796f8a4b05a4d0 (patch)
tree: b75871ef972ad46173228992a510b7f2ac1782c5
parent: ff73fc9547e927a07468f7ee34aac022b522452e (diff)
download: bcm5719-llvm-f497d5511d1aff061c3af95b13796f8a4b05a4d0.tar.gz
bcm5719-llvm-f497d5511d1aff061c3af95b13796f8a4b05a4d0.zip
2 files changed, 31 insertions, 9 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4d51c4f21d2..ed0be2e9131 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4558,7 +4558,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     SDValue SourceVec = V.getOperand(0);
     auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
     if (Source == Sources.end())
-      Sources.push_back(ShuffleSourceInfo(SourceVec));
+      Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
 
     // Update the minimum and maximum lane number seen.
     unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
@@ -4597,8 +4597,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     // This stage of the search produces a source with the same element type as
     // the original, but with a total width matching the BUILD_VECTOR output.
     EVT EltVT = SrcVT.getVectorElementType();
-    EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
-                                  VT.getSizeInBits() / EltVT.getSizeInBits());
+    unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
+    EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
 
     if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
       assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits());
@@ -4612,18 +4612,18 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
 
     assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits());
 
-    if (Src.MaxElt - Src.MinElt >= NumElts) {
+    if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
       // Span too large for a VEXT to cope
       return SDValue();
     }
 
-    if (Src.MinElt >= NumElts) {
+    if (Src.MinElt >= NumSrcElts) {
       // The extraction can just take the second half
       Src.ShuffleVec =
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
-                      DAG.getIntPtrConstant(NumElts));
-      Src.WindowBase = -NumElts;
-    } else if (Src.MaxElt < NumElts) {
+                      DAG.getIntPtrConstant(NumSrcElts));
+      Src.WindowBase = -NumSrcElts;
+    } else if (Src.MaxElt < NumSrcElts) {
       // The extraction can just take the first half
       Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
                                    Src.ShuffleVec, DAG.getIntPtrConstant(0));
@@ -4633,7 +4633,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
                                      Src.ShuffleVec, DAG.getIntPtrConstant(0));
       SDValue VEXTSrc2 =
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
-                      DAG.getIntPtrConstant(NumElts));
+                      DAG.getIntPtrConstant(NumSrcElts));
       unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
 
       Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
diff --git a/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll b/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
new file mode 100644
index 00000000000..d06df7a87fd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define <4 x i16> @f(<4 x i32> %vqdmlal_v3.i, <8 x i16> %x5) {
+entry:
+  ; Check that we don't just dup the input vector. The code emitted is ext, dup, ext, ext
+  ; but only match the last three instructions as the first two could be combined to
+  ; a dup2 at some stage.
+  ; CHECK: dup
+  ; CHECK: ext
+  ; CHECK: ext
+  %x4 = extractelement <4 x i32> %vqdmlal_v3.i, i32 2
+  %vgetq_lane = trunc i32 %x4 to i16
+  %vecinit.i = insertelement <4 x i16> undef, i16 %vgetq_lane, i32 0
+  %vecinit2.i = insertelement <4 x i16> %vecinit.i, i16 %vgetq_lane, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vgetq_lane, i32 3
+  %vgetq_lane261 = extractelement <8 x i16> %x5, i32 0
+  %vset_lane267 = insertelement <4 x i16> %vecinit3.i, i16 %vgetq_lane261, i32 1
+  ret <4 x i16> %vset_lane267
+}
author	James Molloy <james.molloy@arm.com>	2014-10-17 17:06:31 +0000
committer	James Molloy <james.molloy@arm.com>	2014-10-17 17:06:31 +0000
commit	f497d5511d1aff061c3af95b13796f8a4b05a4d0 (patch)
tree	b75871ef972ad46173228992a510b7f2ac1782c5
parent	ff73fc9547e927a07468f7ee34aac022b522452e (diff)
download	bcm5719-llvm-f497d5511d1aff061c3af95b13796f8a4b05a4d0.tar.gz bcm5719-llvm-f497d5511d1aff061c3af95b13796f8a4b05a4d0.zip