diff options
| author | Florian Hahn <flo@fhahn.com> | 2019-08-05 11:12:23 +0000 | 
|---|---|---|
| committer | Florian Hahn <flo@fhahn.com> | 2019-08-05 11:12:23 +0000 | 
| commit | e3ea97b04962334e15047b26fbbbc04c90c78946 (patch) | |
| tree | 6334e617a537d6df56c5187ca61f2f980a6f6fce /llvm | |
| parent | 08f81513e94b70e0f9c955eeee5c56dc37637feb (diff) | |
| download | bcm5719-llvm-e3ea97b04962334e15047b26fbbbc04c90c78946.tar.gz bcm5719-llvm-e3ea97b04962334e15047b26fbbbc04c90c78946.zip  | |
[AArch64] Skip isZIPMask check for masks with an odd number of elements.
We process 2 elements at a time and expect the number of elements to be
even. Similar to D60690.
Reviewers: dmgreen, samparker, t.p.northover
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D65400
llvm-svn: 367831
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll | 26 | 
2 files changed, 28 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6a7fdd4f662..d8c12eb9a05 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6312,6 +6312,8 @@ static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {  static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {    unsigned NumElts = VT.getVectorNumElements(); +  if (NumElts % 2 != 0) +    return false;    WhichResult = (M[0] == 0 ? 0 : 1);    unsigned Idx = WhichResult * NumElts / 2;    for (unsigned i = 0; i != NumElts; i += 2) { diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll b/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll index 7ed0e59e23c..2be8b014ebb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll @@ -31,3 +31,29 @@ define <4 x i32> @widen_shuffles_reduced(<3 x i32> %x, <3 x i32> %y) {    %s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> <i32 1, i32 4, i32 3, i32 0>    ret <4 x i32> %s3  } + +define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) { +; CHECK-LABEL: zip_mask_check: +; CHECK:       // %bb.0: +; CHECK-NEXT:    ldr q0, [x0] +; CHECK-NEXT:    ldr d1, [x1] +; CHECK-NEXT:    trn2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT:    fmla v0.4s, v0.4s, v0.4s +; CHECK-NEXT:    fmla v0.4s, v0.4s, v0.4s +; CHECK-NEXT:    str s0, [x2] +; CHECK-NEXT:    ret +  %tmp3 = load <3 x float>, <3 x float>* %p1, align 16 +  %tmp4 = load <3 x float>, <3 x float>* %p2, align 4 +  %tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> <i32 1, i32 4, i32 undef, i32 undef> +  %tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 undef> +  %tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 5> +  %tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef) +  %tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8) +  %tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> +  %tmp11 = bitcast <16 x float> %tmp10 to <16 x i32> +  %tmp12 = extractelement <16 x i32> %tmp11, i32 0 +  store i32 %tmp12, i32* %p3, align 4 +  ret void +} + +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1  | 

