diff options
author | Sanjay Patel <spatel@rotateright.com> | 2018-01-11 15:13:47 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2018-01-11 15:13:47 +0000 |
commit | e63d8dda5a9f39e1e636e1882ebea79ca7c53d09 (patch) | |
tree | 965785dc1b2ccc90d95aa5ce52e4ce8ea1f02e35 | |
parent | 5388acd3de1122081bca90f76bc240b9e90af752 (diff) | |
download | bcm5719-llvm-e63d8dda5a9f39e1e636e1882ebea79ca7c53d09.tar.gz bcm5719-llvm-e63d8dda5a9f39e1e636e1882ebea79ca7c53d09.zip |
[ValueTracking] recognize min/max-of-min/max with notted ops (PR35875)
This was originally planned as the fix for:
https://bugs.llvm.org/show_bug.cgi?id=35834
...but simpler transforms handled that case, so I implemented a
lesser solution. It turns out we need to handle the case with 'not'
ops too because the real code example that we are trying to solve:
https://bugs.llvm.org/show_bug.cgi?id=35875
...has extra uses of the intermediate values, so we can't rely on
smaller canonicalizations to get us to the goal.
As with rL321672, I've tried to show every possibility in the
codegen tests because that's the simplest way to prove we're doing
the right thing in the wide variety of permutations of this pattern.
We can also show an InstCombine win because we added a fold for
this case in:
rL321998 / D41603
An Alive proof for one variant of the pattern to show that the
InstCombine and codegen results are correct:
https://rise4fun.com/Alive/vd1
Name: min3_nots
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%cmpxyz = icmp slt i8 %minxz, %ny
%r = select i1 %cmpxyz, i8 %minxz, i8 %ny
Name: min3_nots_alt
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
%nz = xor i8 %z, -1
%cmpxz = icmp slt i8 %nx, %nz
%minxz = select i1 %cmpxz, i8 %nx, i8 %nz
%cmpyz = icmp slt i8 %ny, %nz
%minyz = select i1 %cmpyz, i8 %ny, i8 %nz
%cmpyx = icmp slt i8 %y, %x
%r = select i1 %cmpyx, i8 %minxz, i8 %minyz
=>
%xz = icmp sgt i8 %x, %z
%maxxz = select i1 %xz, i8 %x, i8 %z
%xyz = icmp sgt i8 %maxxz, %y
%maxxyz = select i1 %xyz, i8 %maxxz, i8 %y
%r = xor i8 %maxxyz, -1
llvm-svn: 322283
-rw-r--r-- | llvm/lib/Analysis/ValueTracking.cpp | 43 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/minmax-of-minmax.ll | 706 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/max-of-nots.ll | 12 |
3 files changed, 357 insertions, 404 deletions
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index b8f6066bf05..6a322438f5a 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4179,7 +4179,9 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, if (L.Flavor != R.Flavor) return {SPF_UNKNOWN, SPNB_NA, false}; - // Match the compare to the min/max operations of the select operands. + // We have something like: x Pred y ? min(a, b) : min(c, d). + // Try to match the compare to the min/max operations of the select operands. + // First, make sure we have the right compare predicate. switch (L.Flavor) { case SPF_SMIN: if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) { @@ -4217,21 +4219,38 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, return {SPF_UNKNOWN, SPNB_NA, false}; } - // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) - if (CmpLHS == A && CmpRHS == C && D == B) - return {L.Flavor, SPNB_NA, false}; + // If there is a common operand in the already matched min/max and the other + // min/max operands match the compare operands (either directly or inverted), + // then this is min/max of the same flavor. + // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) + // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) + if (D == B) { + if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && + match(A, m_Not(m_Specific(CmpRHS))))) + return {L.Flavor, SPNB_NA, false}; + } // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) - if (CmpLHS == A && CmpRHS == D && C == B) - return {L.Flavor, SPNB_NA, false}; - + // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) + if (C == B) { + if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && + match(A, m_Not(m_Specific(CmpRHS))))) + return {L.Flavor, SPNB_NA, false}; + } // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) - if (CmpLHS == B && CmpRHS == C && D == A) - return {L.Flavor, SPNB_NA, false}; - + // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) + if (D == A) { + if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && + match(B, m_Not(m_Specific(CmpRHS))))) + return {L.Flavor, SPNB_NA, false}; + } // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) - if (CmpLHS == B && CmpRHS == D && C == A) - return {L.Flavor, SPNB_NA, false}; + // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) + if (C == A) { + if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && + match(B, m_Not(m_Specific(CmpRHS))))) + return {L.Flavor, SPNB_NA, false}; + } return {SPF_UNKNOWN, SPNB_NA, false}; } diff --git a/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll b/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll index 0bdffa6b16f..9257832d4c4 100644 --- a/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll +++ b/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll @@ -4,7 +4,7 @@ ; There are 4 commuted variants (abbc/abcb/bcab/bcba) * ; 4 predicate variants ([*][lg][te]) * ; 4 min/max flavors (smin/smax/umin/umax) * -; 2 notted variants +; 2 notted variants ; = 128 tests define <4 x i32> @smin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { @@ -1034,13 +1034,12 @@ define <4 x i32> @umax_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> define <4 x i32> @notted_smin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smin v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1057,13 +1056,12 @@ define <4 x i32> @notted_smin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_smin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smin v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1080,13 +1078,12 @@ define <4 x i32> @notted_smin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_smin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smin v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmgt v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1103,13 +1100,12 @@ define <4 x i32> @notted_smin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_smin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smin v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmgt v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1126,13 +1122,12 @@ define <4 x i32> @notted_smin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_smin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smin v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1149,13 +1144,12 @@ define <4 x i32> @notted_smin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_smin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smin v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1172,13 +1166,12 @@ define <4 x i32> @notted_smin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_smin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smin v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmgt v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1195,13 +1188,12 @@ define <4 x i32> @notted_smin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_smin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smin v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmgt v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1218,13 +1210,12 @@ define <4 x i32> @notted_smin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_smin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smin v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmge v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1241,13 +1232,12 @@ define <4 x i32> @notted_smin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_smin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smin v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmge v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1264,13 +1254,12 @@ define <4 x i32> @notted_smin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_smin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smin v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmge v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1287,13 +1276,12 @@ define <4 x i32> @notted_smin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_smin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smin v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmge v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1310,13 +1298,12 @@ define <4 x i32> @notted_smin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_smin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smin v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmge v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1333,13 +1320,12 @@ define <4 x i32> @notted_smin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_smin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smin v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmge v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1356,13 +1342,12 @@ define <4 x i32> @notted_smin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_smin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smin v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmge v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1379,13 +1364,12 @@ define <4 x i32> @notted_smin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_smin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smin v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmge v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1402,13 +1386,12 @@ define <4 x i32> @notted_smin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_smax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmgt v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1425,13 +1408,12 @@ define <4 x i32> @notted_smax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_smax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmgt v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1448,13 +1430,12 @@ define <4 x i32> @notted_smax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_smax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smax v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1471,13 +1452,12 @@ define <4 x i32> @notted_smax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_smax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1494,13 +1474,12 @@ define <4 x i32> @notted_smax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_smax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmgt v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1517,13 +1496,12 @@ define <4 x i32> @notted_smax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_smax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmgt v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1540,13 +1518,12 @@ define <4 x i32> @notted_smax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_smax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smax v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1563,13 +1540,12 @@ define <4 x i32> @notted_smax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_smax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1586,13 +1562,12 @@ define <4 x i32> @notted_smax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_smax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmge v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1609,13 +1584,12 @@ define <4 x i32> @notted_smax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_smax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmge v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1632,13 +1606,12 @@ define <4 x i32> @notted_smax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_smax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smax v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmge v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1655,13 +1628,12 @@ define <4 x i32> @notted_smax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_smax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmge v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1678,13 +1650,12 @@ define <4 x i32> @notted_smax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_smax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmge v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1701,13 +1672,12 @@ define <4 x i32> @notted_smax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_smax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmge v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1724,13 +1694,12 @@ define <4 x i32> @notted_smax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_smax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smax v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmge v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1747,13 +1716,12 @@ define <4 x i32> @notted_smax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_smax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: smax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: smax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmge v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1770,13 +1738,12 @@ define <4 x i32> @notted_smax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_umin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umin v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1793,13 +1760,12 @@ define <4 x i32> @notted_umin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_umin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umin v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1816,13 +1782,12 @@ define <4 x i32> @notted_umin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_umin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umin v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1839,13 +1804,12 @@ define <4 x i32> @notted_umin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_umin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umin v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1862,13 +1826,12 @@ define <4 x i32> @notted_umin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_umin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umin v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1885,13 +1848,12 @@ define <4 x i32> @notted_umin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_umin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umin v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1908,13 +1870,12 @@ define <4 x i32> @notted_umin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_umin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umin v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1931,13 +1892,12 @@ define <4 x i32> @notted_umin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_umin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umin v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1954,13 +1914,12 @@ define <4 x i32> @notted_umin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_umin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umin v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -1977,13 +1936,12 @@ define <4 x i32> @notted_umin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_umin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umin v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2000,13 +1958,12 @@ define <4 x i32> @notted_umin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_umin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umin v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2023,13 +1980,12 @@ define <4 x i32> @notted_umin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_umin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umin v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2046,13 +2002,12 @@ define <4 x i32> @notted_umin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_umin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umin v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2069,13 +2024,12 @@ define <4 x i32> @notted_umin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_umin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umin v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2092,13 +2046,12 @@ define <4 x i32> @notted_umin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_umin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umin v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2115,13 +2068,12 @@ define <4 x i32> @notted_umin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_umin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umin v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umin v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2138,13 +2090,12 @@ define <4 x i32> @notted_umin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_umax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umax v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2161,13 +2112,12 @@ define <4 x i32> @notted_umax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_umax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umax v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2184,13 +2134,12 @@ define <4 x i32> @notted_umax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_umax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umax v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2207,13 +2156,12 @@ define <4 x i32> @notted_umax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_umax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2230,13 +2178,12 @@ define <4 x i32> @notted_umax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { define <4 x i32> @notted_umax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umax v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2253,13 +2200,12 @@ define <4 x i32> @notted_umax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_umax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umax v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2276,13 +2222,12 @@ define <4 x i32> @notted_umax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_umax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umax v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2299,13 +2244,12 @@ define <4 x i32> @notted_umax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_umax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2322,13 +2266,12 @@ define <4 x i32> @notted_umax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i define <4 x i32> @notted_umax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umax v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2345,13 +2288,12 @@ define <4 x i32> @notted_umax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_umax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umax v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2368,13 +2310,12 @@ define <4 x i32> @notted_umax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_umax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umax v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2391,13 +2332,12 @@ define <4 x i32> @notted_umax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_umax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2414,13 +2354,12 @@ define <4 x i32> @notted_umax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32 define <4 x i32> @notted_umax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umax v1.4s, v1.4s, v4.4s -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2437,13 +2376,12 @@ define <4 x i32> @notted_umax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_umax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v3.4s, v3.4s, v1.4s -; CHECK-NEXT: umax v1.4s, v4.4s, v1.4s -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2460,13 +2398,12 @@ define <4 x i32> @notted_umax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_umax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umax v1.4s, v3.4s, v1.4s -; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -2483,13 +2420,12 @@ define <4 x i32> @notted_umax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @notted_umax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v3.16b, v0.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v4.16b, v2.16b -; CHECK-NEXT: umax v4.4s, v1.4s, v4.4s -; CHECK-NEXT: umax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s -; CHECK-NEXT: bsl v0.16b, v4.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %b = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> diff --git a/llvm/test/Transforms/InstCombine/max-of-nots.ll b/llvm/test/Transforms/InstCombine/max-of-nots.ll index 016147600ed..9e46aba1d11 100644 --- a/llvm/test/Transforms/InstCombine/max-of-nots.ll +++ b/llvm/test/Transforms/InstCombine/max-of-nots.ll @@ -109,13 +109,11 @@ define i8 @umin3_not_more_uses(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @umin3_not_more_uses( ; CHECK-NEXT: [[NX:%.*]] = xor i8 %x, -1 ; CHECK-NEXT: [[NY:%.*]] = xor i8 %y, -1 -; CHECK-NEXT: [[NZ:%.*]] = xor i8 %z, -1 -; CHECK-NEXT: [[CMPXZ:%.*]] = icmp ult i8 [[NX]], [[NZ]] -; CHECK-NEXT: [[MINXZ:%.*]] = select i1 [[CMPXZ]], i8 [[NX]], i8 [[NZ]] -; CHECK-NEXT: [[CMPYZ:%.*]] = icmp ult i8 [[NY]], [[NZ]] -; CHECK-NEXT: [[MINYZ:%.*]] = select i1 [[CMPYZ]], i8 [[NY]], i8 [[NZ]] -; CHECK-NEXT: [[CMPYX:%.*]] = icmp ult i8 %y, %x -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMPYX]], i8 [[MINXZ]], i8 [[MINYZ]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 %x, %z +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 %x, i8 %z +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], %y +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 %y +; CHECK-NEXT: [[R:%.*]] = xor i8 [[TMP4]], -1 ; CHECK-NEXT: call void @extra_use(i8 [[NX]]) ; CHECK-NEXT: call void @extra_use(i8 [[NY]]) ; CHECK-NEXT: ret i8 [[R]] |