summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorian Hahn <flo@fhahn.com>2019-05-21 10:05:26 +0000
committerFlorian Hahn <flo@fhahn.com>2019-05-21 10:05:26 +0000
commit4a8835c655e83326f121a1340bd5ea372177d60b (patch)
tree1673fe4bc09525ce64957ac2f6cd0c4e334c084a
parent2eebf4d939b65650ba14ec062fe4be750fa347e1 (diff)
downloadbcm5719-llvm-4a8835c655e83326f121a1340bd5ea372177d60b.tar.gz
bcm5719-llvm-4a8835c655e83326f121a1340bd5ea372177d60b.zip
[AArch64] Skip mask checks for masks with an odd number of elements.
Some checks in isShuffleMaskLegal expect an even number of elements, e.g. isTRN_v_undef_Mask or isUZP_v_undef_Mask, otherwise they access invalid elements and crash. This patch adds checks to the impacted functions. Fixes PR41951 Reviewers: t.p.northover, dmgreen, samparker Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D60690 llvm-svn: 361235
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp6
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll33
2 files changed, 39 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 43620f150b6..4b027e93633 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6292,6 +6292,8 @@ static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts % 2 != 0)
+ return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
@@ -6306,6 +6308,8 @@ static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts % 2 != 0)
+ return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
@@ -6342,6 +6346,8 @@ static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts % 2 != 0)
+ return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll b/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
new file mode 100644
index 00000000000..7ed0e59e23c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-linux -o - | FileCheck %s
+
+define void @test(i32* %p1, i32* %p2) {
+; CHECK-LABEL: test:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #3
+; CHECK-NEXT: mov w9, #1
+; CHECK-NEXT: str w8, [x0]
+; CHECK-NEXT: str w9, [x1]
+; CHECK-NEXT: ret
+ %tmp = shufflevector <1 x i32> <i32 1>, <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+ %tmp2 = shufflevector <3 x i32> <i32 2, i32 3, i32 4>, <3 x i32> %tmp, <3 x i32> <i32 0, i32 1, i32 3>
+ %tmp3 = shufflevector <3 x i32> %tmp2, <3 x i32> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+ %tmp4 = shufflevector <6 x i32> undef, <6 x i32> %tmp3, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ %tmp6 = extractelement <9 x i32> %tmp4, i32 7
+ %tmp8 = extractelement <9 x i32> %tmp4, i32 8
+ store i32 %tmp6, i32* %p1, align 4
+ store i32 %tmp8, i32* %p2, align 4
+ ret void
+}
+
+; Test case from PR41951
+define <4 x i32> @widen_shuffles_reduced(<3 x i32> %x, <3 x i32> %y) {
+; CHECK-LABEL: widen_shuffles_reduced:
+; CHECK: // %bb.0:
+; CHECK-NEXT: zip1 v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: zip1 v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ext v0.16b, v0.16b, v2.16b, #8
+; CHECK-NEXT: ret
+ %s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> <i32 1, i32 4, i32 3, i32 0>
+ ret <4 x i32> %s3
+}
OpenPOWER on IntegriCloud