summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorFlorian Hahn <flo@fhahn.com>2019-08-05 11:12:23 +0000
committerFlorian Hahn <flo@fhahn.com>2019-08-05 11:12:23 +0000
commite3ea97b04962334e15047b26fbbbc04c90c78946 (patch)
tree6334e617a537d6df56c5187ca61f2f980a6f6fce /llvm
parent08f81513e94b70e0f9c955eeee5c56dc37637feb (diff)
downloadbcm5719-llvm-e3ea97b04962334e15047b26fbbbc04c90c78946.tar.gz
bcm5719-llvm-e3ea97b04962334e15047b26fbbbc04c90c78946.zip
[AArch64] Skip isZIPMask check for masks with an odd number of elements.
We process 2 elements at a time and expect the number of elements to be even. Similar to D60690. Reviewers: dmgreen, samparker, t.p.northover Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D65400 llvm-svn: 367831
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp2
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll26
2 files changed, 28 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6a7fdd4f662..d8c12eb9a05 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6312,6 +6312,8 @@ static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts % 2 != 0)
+ return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll b/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
index 7ed0e59e23c..2be8b014ebb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll
@@ -31,3 +31,29 @@ define <4 x i32> @widen_shuffles_reduced(<3 x i32> %x, <3 x i32> %y) {
%s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> <i32 1, i32 4, i32 3, i32 0>
ret <4 x i32> %s3
}
+
+define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) {
+; CHECK-LABEL: zip_mask_check:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fmla v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: fmla v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: str s0, [x2]
+; CHECK-NEXT: ret
+ %tmp3 = load <3 x float>, <3 x float>* %p1, align 16
+ %tmp4 = load <3 x float>, <3 x float>* %p2, align 4
+ %tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> <i32 1, i32 4, i32 undef, i32 undef>
+ %tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
+ %tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+ %tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef)
+ %tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8)
+ %tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %tmp11 = bitcast <16 x float> %tmp10 to <16 x i32>
+ %tmp12 = extractelement <16 x i32> %tmp11, i32 0
+ store i32 %tmp12, i32* %p3, align 4
+ ret void
+}
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
OpenPOWER on IntegriCloud