diff options
| author | Roman Lebedev <lebedev.ri@gmail.com> | 2018-05-21 21:41:02 +0000 |
|---|---|---|
| committer | Roman Lebedev <lebedev.ri@gmail.com> | 2018-05-21 21:41:02 +0000 |
| commit | 7772de25d07c977e41f8faa3bbf327033cd81c20 (patch) | |
| tree | 7b728f8308187d0334a44d96cff110090adba71e /llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll | |
| parent | fd79bc3aa25fa19eafa6871c7a0a40dd680b4775 (diff) | |
| download | bcm5719-llvm-7772de25d07c977e41f8faa3bbf327033cd81c20.tar.gz bcm5719-llvm-7772de25d07c977e41f8faa3bbf327033cd81c20.zip | |
[DAGCombine][X86][AArch64] Masked merge unfolding: vector edition.
Summary:
This **appears** to be the last missing piece for the masked merge pattern handling in the backend.
This is [[ https://bugs.llvm.org/show_bug.cgi?id=37104 | PR37104 ]].
[[ https://bugs.llvm.org/show_bug.cgi?id=6773 | PR6773 ]] will introduce an IR canonicalization that is likely bad for the end assembly.
Previously, `andps`+`andnps` / `bsl` would be generated. (see `@out`)
Now, they would no longer be generated (see `@in`), and we need to make sure that they are generated.
Differential Revision: https://reviews.llvm.org/D46528
llvm-svn: 332904
Diffstat (limited to 'llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll')
| -rw-r--r-- | llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll | 70 |
1 files changed, 28 insertions, 42 deletions
diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll index c3199cc3bee..df86540fdd9 100644 --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll @@ -270,9 +270,8 @@ define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwin define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { ; CHECK-LABEL: in_v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <1 x i8> %x, %y %n1 = and <1 x i8> %n0, %mask @@ -287,9 +286,8 @@ define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { define <2 x i8> @in_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { ; CHECK-LABEL: in_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <2 x i8> %x, %y %n1 = and <2 x i8> %n0, %mask @@ -300,9 +298,8 @@ define <2 x i8> @in_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { ; CHECK-LABEL: in_v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <1 x i16> %x, %y %n1 = and <1 x i16> %n0, %mask @@ -317,9 +314,8 @@ define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-LABEL: in_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <4 x i8> %x, %y %n1 = and <4 x i8> %n0, %mask @@ -330,9 +326,8 @@ define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { ; CHECK-LABEL: in_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <2 x i16> %x, %y %n1 = and <2 x i16> %n0, %mask @@ -343,9 +338,8 @@ define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { ; CHECK-LABEL: in_v1i32: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <1 x i32> %x, %y %n1 = and <1 x i32> %n0, %mask @@ -360,9 +354,8 @@ define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-LABEL: in_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <8 x i8> %x, %y %n1 = and <8 x i8> %n0, %mask @@ -373,9 +366,8 @@ define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { ; CHECK-LABEL: in_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <4 x i16> %x, %y %n1 = and <4 x i16> %n0, %mask @@ -386,9 +378,8 @@ define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { ; CHECK-LABEL: in_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <2 x i32> %x, %y %n1 = and <2 x i32> %n0, %mask @@ -399,9 +390,8 @@ define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { ; CHECK-LABEL: in_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: bsl v2.8b, v0.8b, v1.8b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <1 x i64> %x, %y %n1 = and <1 x i64> %n0, %mask @@ -416,9 +406,8 @@ define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { ; CHECK-LABEL: in_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <16 x i8> %x, %y %n1 = and <16 x i8> %n0, %mask @@ -429,9 +418,8 @@ define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { ; CHECK-LABEL: in_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <8 x i16> %x, %y %n1 = and <8 x i16> %n0, %mask @@ -442,9 +430,8 @@ define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind define <4 x i32> @in_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind { ; CHECK-LABEL: in_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <4 x i32> %x, %y %n1 = and <4 x i32> %n0, %mask @@ -455,9 +442,8 @@ define <4 x i32> @in_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { ; CHECK-LABEL: in_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %n0 = xor <2 x i64> %x, %y %n1 = and <2 x i64> %n0, %mask |

