summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp20
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll290
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll46
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll64
4 files changed, 221 insertions, 199 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 6f7b6214e95..44c43edfcd0 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -12629,6 +12629,24 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return Vec;
}
+static SDValue
+PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ SDLoc dl(N);
+
+ // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
+ if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
+ // If the valuetypes are the same, we can remove the cast entirely.
+ if (Op->getOperand(0).getValueType() == VT)
+ return Op->getOperand(0);
+ return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl,
+ Op->getOperand(0).getValueType(), Op->getOperand(0));
+ }
+
+ return SDValue();
+}
+
/// PerformInsertEltCombine - Target-specific dag combine xforms for
/// ISD::INSERT_VECTOR_ELT.
static SDValue PerformInsertEltCombine(SDNode *N,
@@ -14169,6 +14187,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformVLDCombine(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
+ case ARMISD::PREDICATE_CAST:
+ return PerformPREDICATE_CASTCombine(N, DCI);
case ARMISD::SMULWB: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
index 0bb5a7c0958..4302b9a8e2a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
@@ -21,26 +21,27 @@ entry:
define void @foo_sext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%src) {
; CHECK-LABEL: foo_sext_v4i32_v4i8:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vmrs r12, p0
-; CHECK-NEXT: and r1, r12, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #0, #1
-; CHECK-NEXT: ubfx r1, r12, #4, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #1, #1
-; CHECK-NEXT: ubfx r1, r12, #8, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #2, #1
-; CHECK-NEXT: ubfx r1, r12, #12, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #3, #1
-; CHECK-NEXT: and r1, r3, #15
+; CHECK-NEXT: vmrs lr, p0
+; CHECK-NEXT: and r1, lr, #1
+; CHECK-NEXT: ubfx r3, lr, #4, #1
+; CHECK-NEXT: rsb.w r12, r1, #0
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r12, #0, #1
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, lr, #8, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
+; CHECK-NEXT: ubfx r3, lr, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #3, #1
; CHECK-NEXT: lsls r3, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrbne r3, [r2]
@@ -62,7 +63,7 @@ define void @foo_sext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrwt.32 q0, [r0]
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: bx lr
+; CHECK-NEXT: pop {r7, pc}
entry:
%0 = load <4 x i32>, <4 x i32>* %mask, align 4
%1 = icmp sgt <4 x i32> %0, zeroinitializer
@@ -75,26 +76,27 @@ entry:
define void @foo_sext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> *%src) {
; CHECK-LABEL: foo_sext_v4i32_v4i16:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vmrs r12, p0
-; CHECK-NEXT: and r1, r12, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #0, #1
-; CHECK-NEXT: ubfx r1, r12, #4, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #1, #1
-; CHECK-NEXT: ubfx r1, r12, #8, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #2, #1
-; CHECK-NEXT: ubfx r1, r12, #12, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #3, #1
-; CHECK-NEXT: and r1, r3, #15
+; CHECK-NEXT: vmrs lr, p0
+; CHECK-NEXT: and r1, lr, #1
+; CHECK-NEXT: ubfx r3, lr, #4, #1
+; CHECK-NEXT: rsb.w r12, r1, #0
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r12, #0, #1
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, lr, #8, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
+; CHECK-NEXT: ubfx r3, lr, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #3, #1
; CHECK-NEXT: lsls r3, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrhne r3, [r2]
@@ -115,7 +117,7 @@ define void @foo_sext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16>
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrwt.32 q0, [r0]
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: bx lr
+; CHECK-NEXT: pop {r7, pc}
entry:
%0 = load <4 x i32>, <4 x i32>* %mask, align 4
%1 = icmp sgt <4 x i32> %0, zeroinitializer
@@ -128,27 +130,28 @@ entry:
define void @foo_zext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%src) {
; CHECK-LABEL: foo_zext_v4i32_v4i8:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vmov.i32 q1, #0xff
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vmrs r12, p0
-; CHECK-NEXT: and r1, r12, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #0, #1
-; CHECK-NEXT: ubfx r1, r12, #4, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #1, #1
-; CHECK-NEXT: ubfx r1, r12, #8, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #2, #1
-; CHECK-NEXT: ubfx r1, r12, #12, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #3, #1
-; CHECK-NEXT: and r1, r3, #15
+; CHECK-NEXT: vmrs lr, p0
+; CHECK-NEXT: and r1, lr, #1
+; CHECK-NEXT: ubfx r3, lr, #4, #1
+; CHECK-NEXT: rsb.w r12, r1, #0
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r12, #0, #1
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, lr, #8, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
+; CHECK-NEXT: ubfx r3, lr, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #3, #1
; CHECK-NEXT: lsls r3, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrbne r3, [r2]
@@ -169,7 +172,7 @@ define void @foo_zext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrwt.32 q0, [r0]
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: bx lr
+; CHECK-NEXT: pop {r7, pc}
entry:
%0 = load <4 x i32>, <4 x i32>* %mask, align 4
%1 = icmp sgt <4 x i32> %0, zeroinitializer
@@ -182,26 +185,27 @@ entry:
define void @foo_zext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> *%src) {
; CHECK-LABEL: foo_zext_v4i32_v4i16:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vmrs r12, p0
-; CHECK-NEXT: and r1, r12, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #0, #1
-; CHECK-NEXT: ubfx r1, r12, #4, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #1, #1
-; CHECK-NEXT: ubfx r1, r12, #8, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #2, #1
-; CHECK-NEXT: ubfx r1, r12, #12, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #3, #1
-; CHECK-NEXT: and r1, r3, #15
+; CHECK-NEXT: vmrs lr, p0
+; CHECK-NEXT: and r1, lr, #1
+; CHECK-NEXT: ubfx r3, lr, #4, #1
+; CHECK-NEXT: rsb.w r12, r1, #0
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r12, #0, #1
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, lr, #8, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
+; CHECK-NEXT: ubfx r3, lr, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #3, #1
; CHECK-NEXT: lsls r3, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: ldrhne r3, [r2]
@@ -222,7 +226,7 @@ define void @foo_zext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16>
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrwt.32 q0, [r0]
; CHECK-NEXT: add sp, #4
-; CHECK-NEXT: bx lr
+; CHECK-NEXT: pop {r7, pc}
entry:
%0 = load <4 x i32>, <4 x i32>* %mask, align 4
%1 = icmp sgt <4 x i32> %0, zeroinitializer
@@ -251,35 +255,37 @@ entry:
define void @foo_sext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%src) {
; CHECK-LABEL: foo_sext_v8i16_v8i8:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s16 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vmrs r12, p0
-; CHECK-NEXT: and r1, r12, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #0, #1
-; CHECK-NEXT: ubfx r1, r12, #2, #1
+; CHECK-NEXT: vmrs lr, p0
+; CHECK-NEXT: and r3, lr, #1
+; CHECK-NEXT: ubfx r1, lr, #2, #1
+; CHECK-NEXT: rsb.w r12, r3, #0
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r12, #0, #1
; CHECK-NEXT: bfi r3, r1, #1, #1
-; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: ubfx r1, lr, #4, #1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r3, r1, #2, #1
-; CHECK-NEXT: ubfx r1, r12, #6, #1
+; CHECK-NEXT: ubfx r1, lr, #6, #1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r3, r1, #3, #1
-; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: ubfx r1, lr, #8, #1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r3, r1, #4, #1
-; CHECK-NEXT: ubfx r1, r12, #10, #1
+; CHECK-NEXT: ubfx r1, lr, #10, #1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r3, r1, #5, #1
-; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: ubfx r1, lr, #12, #1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r3, r1, #6, #1
-; CHECK-NEXT: ubfx r1, r12, #14, #1
+; CHECK-NEXT: ubfx r1, lr, #14, #1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r3, r1, #7, #1
; CHECK-NEXT: uxtb r1, r3
@@ -319,7 +325,7 @@ define void @foo_sext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrht.16 q0, [r0]
; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: bx lr
+; CHECK-NEXT: pop {r7, pc}
entry:
%0 = load <8 x i16>, <8 x i16>* %mask, align 2
%1 = icmp sgt <8 x i16> %0, zeroinitializer
@@ -332,35 +338,37 @@ entry:
define void @foo_zext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%src) {
; CHECK-LABEL: foo_zext_v8i16_v8i8:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vcmp.s16 gt, q0, zr
; CHECK-NEXT: @ implicit-def: $q0
-; CHECK-NEXT: vmrs r12, p0
-; CHECK-NEXT: and r1, r12, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #0, #1
-; CHECK-NEXT: ubfx r1, r12, #2, #1
+; CHECK-NEXT: vmrs lr, p0
+; CHECK-NEXT: and r3, lr, #1
+; CHECK-NEXT: ubfx r1, lr, #2, #1
+; CHECK-NEXT: rsb.w r12, r3, #0
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: bfi r3, r12, #0, #1
; CHECK-NEXT: bfi r3, r1, #1, #1
-; CHECK-NEXT: ubfx r1, r12, #4, #1
+; CHECK-NEXT: ubfx r1, lr, #4, #1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r3, r1, #2, #1
-; CHECK-NEXT: ubfx r1, r12, #6, #1
+; CHECK-NEXT: ubfx r1, lr, #6, #1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r3, r1, #3, #1
-; CHECK-NEXT: ubfx r1, r12, #8, #1
+; CHECK-NEXT: ubfx r1, lr, #8, #1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r3, r1, #4, #1
-; CHECK-NEXT: ubfx r1, r12, #10, #1
+; CHECK-NEXT: ubfx r1, lr, #10, #1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r3, r1, #5, #1
-; CHECK-NEXT: ubfx r1, r12, #12, #1
+; CHECK-NEXT: ubfx r1, lr, #12, #1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r3, r1, #6, #1
-; CHECK-NEXT: ubfx r1, r12, #14, #1
+; CHECK-NEXT: ubfx r1, lr, #14, #1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: bfi r3, r1, #7, #1
; CHECK-NEXT: uxtb r1, r3
@@ -400,7 +408,7 @@ define void @foo_zext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrht.16 q0, [r0]
; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: bx lr
+; CHECK-NEXT: pop {r7, pc}
entry:
%0 = load <8 x i16>, <8 x i16>* %mask, align 2
%1 = icmp sgt <8 x i16> %0, zeroinitializer
@@ -432,36 +440,36 @@ define void @foo_trunc_v8i8_v8i16(<8 x i8> *%dest, <8 x i16> *%mask, <8 x i16> *
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vldrh.u16 q0, [r1]
-; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vpt.s16 gt, q0, zr
; CHECK-NEXT: vldrht.u16 q0, [r2]
; CHECK-NEXT: vmrs r1, p0
; CHECK-NEXT: and r2, r1, #1
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #0, #1
-; CHECK-NEXT: ubfx r2, r1, #2, #1
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #1, #1
-; CHECK-NEXT: ubfx r2, r1, #4, #1
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #2, #1
-; CHECK-NEXT: ubfx r2, r1, #6, #1
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #3, #1
-; CHECK-NEXT: ubfx r2, r1, #8, #1
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #4, #1
-; CHECK-NEXT: ubfx r2, r1, #10, #1
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #5, #1
-; CHECK-NEXT: ubfx r2, r1, #12, #1
+; CHECK-NEXT: rsbs r3, r2, #0
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: bfi r2, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r1, #2, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r1, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #2, #1
+; CHECK-NEXT: ubfx r3, r1, #6, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #3, #1
+; CHECK-NEXT: ubfx r3, r1, #8, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #4, #1
+; CHECK-NEXT: ubfx r3, r1, #10, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #5, #1
+; CHECK-NEXT: ubfx r3, r1, #12, #1
; CHECK-NEXT: ubfx r1, r1, #14, #1
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #6, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r2, r3, #6, #1
; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #7, #1
-; CHECK-NEXT: lsls r2, r3, #31
-; CHECK-NEXT: uxtb r1, r3
+; CHECK-NEXT: bfi r2, r1, #7, #1
+; CHECK-NEXT: uxtb r1, r2
+; CHECK-NEXT: lsls r2, r2, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne.u16 r2, q0[0]
; CHECK-NEXT: strbne r2, [r0]
@@ -510,23 +518,22 @@ define void @foo_trunc_v4i8_v4i32(<4 x i8> *%dest, <4 x i32> *%mask, <4 x i32> *
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vpt.s32 gt, q0, zr
; CHECK-NEXT: vldrwt.u32 q0, [r2]
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: and r2, r1, #1
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #0, #1
-; CHECK-NEXT: ubfx r2, r1, #4, #1
+; CHECK-NEXT: vmrs r2, p0
+; CHECK-NEXT: and r1, r2, #1
+; CHECK-NEXT: rsbs r3, r1, #0
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: bfi r1, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r2, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r2, #8, #1
+; CHECK-NEXT: ubfx r2, r2, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #1, #1
-; CHECK-NEXT: ubfx r2, r1, #8, #1
-; CHECK-NEXT: ubfx r1, r1, #12, #1
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #2, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #3, #1
-; CHECK-NEXT: and r1, r3, #15
+; CHECK-NEXT: bfi r1, r2, #3, #1
; CHECK-NEXT: lsls r2, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne r2, s0
@@ -560,23 +567,22 @@ define void @foo_trunc_v4i16_v4i32(<4 x i16> *%dest, <4 x i32> *%mask, <4 x i32>
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vldrw.u32 q0, [r1]
-; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vpt.s32 gt, q0, zr
; CHECK-NEXT: vldrwt.u32 q0, [r2]
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: and r2, r1, #1
+; CHECK-NEXT: vmrs r2, p0
+; CHECK-NEXT: and r1, r2, #1
+; CHECK-NEXT: rsbs r3, r1, #0
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: bfi r1, r3, #0, #1
+; CHECK-NEXT: ubfx r3, r2, #4, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #1, #1
+; CHECK-NEXT: ubfx r3, r2, #8, #1
+; CHECK-NEXT: ubfx r2, r2, #12, #1
+; CHECK-NEXT: rsbs r3, r3, #0
+; CHECK-NEXT: bfi r1, r3, #2, #1
; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #0, #1
-; CHECK-NEXT: ubfx r2, r1, #4, #1
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #1, #1
-; CHECK-NEXT: ubfx r2, r1, #8, #1
-; CHECK-NEXT: ubfx r1, r1, #12, #1
-; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: bfi r3, r2, #2, #1
-; CHECK-NEXT: rsbs r1, r1, #0
-; CHECK-NEXT: bfi r3, r1, #3, #1
-; CHECK-NEXT: and r1, r3, #15
+; CHECK-NEXT: bfi r1, r2, #3, #1
; CHECK-NEXT: lsls r2, r1, #31
; CHECK-NEXT: itt ne
; CHECK-NEXT: vmovne r2, s0
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
index f8f2e0b5613..61a78eb722e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
@@ -218,10 +218,10 @@ define arm_aapcs_vfpcc i4 @bitcast_from_v4i1(<4 x i32> %a) {
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-LE-NEXT: movs r0, #0
; CHECK-LE-NEXT: vmrs r1, p0
-; CHECK-LE-NEXT: and r2, r1, #1
-; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: and r0, r1, #1
+; CHECK-LE-NEXT: rsbs r2, r0, #0
+; CHECK-LE-NEXT: movs r0, #0
; CHECK-LE-NEXT: bfi r0, r2, #0, #1
; CHECK-LE-NEXT: ubfx r2, r1, #4, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
@@ -232,7 +232,6 @@ define arm_aapcs_vfpcc i4 @bitcast_from_v4i1(<4 x i32> %a) {
; CHECK-LE-NEXT: bfi r0, r2, #2, #1
; CHECK-LE-NEXT: rsbs r1, r1, #0
; CHECK-LE-NEXT: bfi r0, r1, #3, #1
-; CHECK-LE-NEXT: and r0, r0, #15
; CHECK-LE-NEXT: add sp, #4
; CHECK-LE-NEXT: bx lr
;
@@ -241,22 +240,21 @@ define arm_aapcs_vfpcc i4 @bitcast_from_v4i1(<4 x i32> %a) {
; CHECK-BE-NEXT: .pad #4
; CHECK-BE-NEXT: sub sp, #4
; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: movs r3, #0
; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr
-; CHECK-BE-NEXT: vmrs r0, p0
-; CHECK-BE-NEXT: and r2, r0, #1
-; CHECK-BE-NEXT: ubfx r1, r0, #4, #1
+; CHECK-BE-NEXT: vmrs r1, p0
+; CHECK-BE-NEXT: and r0, r1, #1
+; CHECK-BE-NEXT: rsbs r2, r0, #0
+; CHECK-BE-NEXT: movs r0, #0
+; CHECK-BE-NEXT: bfi r0, r2, #0, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #4, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r0, r2, #1, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #8, #1
+; CHECK-BE-NEXT: ubfx r1, r1, #12, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r0, r2, #2, #1
; CHECK-BE-NEXT: rsbs r1, r1, #0
-; CHECK-BE-NEXT: bfi r3, r2, #0, #1
-; CHECK-BE-NEXT: bfi r3, r1, #1, #1
-; CHECK-BE-NEXT: ubfx r1, r0, #8, #1
-; CHECK-BE-NEXT: ubfx r0, r0, #12, #1
-; CHECK-BE-NEXT: rsbs r1, r1, #0
-; CHECK-BE-NEXT: bfi r3, r1, #2, #1
-; CHECK-BE-NEXT: rsbs r0, r0, #0
-; CHECK-BE-NEXT: bfi r3, r0, #3, #1
-; CHECK-BE-NEXT: and r0, r3, #15
+; CHECK-BE-NEXT: bfi r0, r1, #3, #1
; CHECK-BE-NEXT: add sp, #4
; CHECK-BE-NEXT: bx lr
entry:
@@ -271,10 +269,10 @@ define arm_aapcs_vfpcc i8 @bitcast_from_v8i1(<8 x i16> %a) {
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr
-; CHECK-LE-NEXT: movs r0, #0
; CHECK-LE-NEXT: vmrs r1, p0
-; CHECK-LE-NEXT: and r2, r1, #1
-; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: and r0, r1, #1
+; CHECK-LE-NEXT: rsbs r2, r0, #0
+; CHECK-LE-NEXT: movs r0, #0
; CHECK-LE-NEXT: bfi r0, r2, #0, #1
; CHECK-LE-NEXT: ubfx r2, r1, #2, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
@@ -308,12 +306,12 @@ define arm_aapcs_vfpcc i8 @bitcast_from_v8i1(<8 x i16> %a) {
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr
; CHECK-BE-NEXT: vmrs r1, p0
-; CHECK-BE-NEXT: ubfx r0, r1, #2, #1
-; CHECK-BE-NEXT: rsbs r2, r0, #0
; CHECK-BE-NEXT: and r0, r1, #1
-; CHECK-BE-NEXT: rsbs r3, r0, #0
+; CHECK-BE-NEXT: rsbs r2, r0, #0
; CHECK-BE-NEXT: movs r0, #0
-; CHECK-BE-NEXT: bfi r0, r3, #0, #1
+; CHECK-BE-NEXT: bfi r0, r2, #0, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #2, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
; CHECK-BE-NEXT: bfi r0, r2, #1, #1
; CHECK-BE-NEXT: ubfx r2, r1, #4, #1
; CHECK-BE-NEXT: rsbs r2, r2, #0
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
index d314d0525c4..21af261170e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
@@ -176,44 +176,42 @@ define arm_aapcs_vfpcc void @store_v4i1(<4 x i1> *%dst, <4 x i32> %a) {
; CHECK-LE-LABEL: store_v4i1:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-LE-NEXT: movs r1, #0
-; CHECK-LE-NEXT: vmrs r2, p0
-; CHECK-LE-NEXT: and r3, r2, #1
-; CHECK-LE-NEXT: rsbs r3, r3, #0
-; CHECK-LE-NEXT: bfi r1, r3, #0, #1
-; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
-; CHECK-LE-NEXT: rsbs r3, r3, #0
-; CHECK-LE-NEXT: bfi r1, r3, #1, #1
-; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
-; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
-; CHECK-LE-NEXT: rsbs r3, r3, #0
-; CHECK-LE-NEXT: bfi r1, r3, #2, #1
+; CHECK-LE-NEXT: movs r3, #0
+; CHECK-LE-NEXT: vmrs r1, p0
+; CHECK-LE-NEXT: and r2, r1, #1
; CHECK-LE-NEXT: rsbs r2, r2, #0
-; CHECK-LE-NEXT: bfi r1, r2, #3, #1
-; CHECK-LE-NEXT: and r1, r1, #15
-; CHECK-LE-NEXT: strb r1, [r0]
+; CHECK-LE-NEXT: bfi r3, r2, #0, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #4, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #1, #1
+; CHECK-LE-NEXT: ubfx r2, r1, #8, #1
+; CHECK-LE-NEXT: ubfx r1, r1, #12, #1
+; CHECK-LE-NEXT: rsbs r2, r2, #0
+; CHECK-LE-NEXT: bfi r3, r2, #2, #1
+; CHECK-LE-NEXT: rsbs r1, r1, #0
+; CHECK-LE-NEXT: bfi r3, r1, #3, #1
+; CHECK-LE-NEXT: strb r3, [r0]
; CHECK-LE-NEXT: bx lr
;
; CHECK-BE-LABEL: store_v4i1:
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: movs r3, #0
; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr
; CHECK-BE-NEXT: vmrs r1, p0
-; CHECK-BE-NEXT: and r3, r1, #1
+; CHECK-BE-NEXT: and r2, r1, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #0, #1
; CHECK-BE-NEXT: ubfx r2, r1, #4, #1
-; CHECK-BE-NEXT: rsbs r3, r3, #0
-; CHECK-BE-NEXT: rsb.w r12, r2, #0
-; CHECK-BE-NEXT: movs r2, #0
-; CHECK-BE-NEXT: bfi r2, r3, #0, #1
-; CHECK-BE-NEXT: ubfx r3, r1, #8, #1
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #1, #1
+; CHECK-BE-NEXT: ubfx r2, r1, #8, #1
; CHECK-BE-NEXT: ubfx r1, r1, #12, #1
-; CHECK-BE-NEXT: bfi r2, r12, #1, #1
-; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: rsbs r2, r2, #0
+; CHECK-BE-NEXT: bfi r3, r2, #2, #1
; CHECK-BE-NEXT: rsbs r1, r1, #0
-; CHECK-BE-NEXT: bfi r2, r3, #2, #1
-; CHECK-BE-NEXT: bfi r2, r1, #3, #1
-; CHECK-BE-NEXT: and r1, r2, #15
-; CHECK-BE-NEXT: strb r1, [r0]
+; CHECK-BE-NEXT: bfi r3, r1, #3, #1
+; CHECK-BE-NEXT: strb r3, [r0]
; CHECK-BE-NEXT: bx lr
entry:
%c = icmp eq <4 x i32> %a, zeroinitializer
@@ -225,10 +223,10 @@ define arm_aapcs_vfpcc void @store_v8i1(<8 x i1> *%dst, <8 x i16> %a) {
; CHECK-LE-LABEL: store_v8i1:
; CHECK-LE: @ %bb.0: @ %entry
; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr
-; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: vmrs r2, p0
-; CHECK-LE-NEXT: and r3, r2, #1
-; CHECK-LE-NEXT: rsbs r3, r3, #0
+; CHECK-LE-NEXT: and r1, r2, #1
+; CHECK-LE-NEXT: rsbs r3, r1, #0
+; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: bfi r1, r3, #0, #1
; CHECK-LE-NEXT: ubfx r3, r2, #2, #1
; CHECK-LE-NEXT: rsbs r3, r3, #0
@@ -259,14 +257,14 @@ define arm_aapcs_vfpcc void @store_v8i1(<8 x i1> *%dst, <8 x i16> %a) {
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr
; CHECK-BE-NEXT: vmrs r2, p0
-; CHECK-BE-NEXT: ubfx r1, r2, #2, #1
-; CHECK-BE-NEXT: rsb.w r12, r1, #0
; CHECK-BE-NEXT: and r1, r2, #1
; CHECK-BE-NEXT: rsbs r3, r1, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: bfi r1, r3, #0, #1
+; CHECK-BE-NEXT: ubfx r3, r2, #2, #1
+; CHECK-BE-NEXT: rsbs r3, r3, #0
+; CHECK-BE-NEXT: bfi r1, r3, #1, #1
; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
-; CHECK-BE-NEXT: bfi r1, r12, #1, #1
; CHECK-BE-NEXT: rsbs r3, r3, #0
; CHECK-BE-NEXT: bfi r1, r3, #2, #1
; CHECK-BE-NEXT: ubfx r3, r2, #6, #1
OpenPOWER on IntegriCloud