summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll')
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll28
1 files changed, 16 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
index d701e0f1b57..5900dd9ac66 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
@@ -3,8 +3,8 @@
; CHECK-LABEL: vpsel_mul_reduce_add
; CHECK: dls lr, lr
; CHECK: [[LOOP:.LBB[0-9_]+]]:
-; CHECK: sub{{.*}} [[ELEMS:r[0-9]+]], #4
-; CHECK: vctp.32 [[ELEMS]]
+; CHECK: vctp.32 [[ELEMS:r[0-9]+]]
+; CHECK: mov [[ELEMS_OUT:r[0-9]+]], [[ELEMS]]
; CHECK: vstr p0, [sp
; CHECK: vpstt
; CHECK-NEXT: vldrwt.u32
@@ -14,8 +14,9 @@
; CHECK: vldr p0, [sp
; CHECK: vpst
; CHECK-NEXT: vldrwt.u32 q{{.*}}, [r0]
+; CHECK: sub{{.*}} [[ELEMS]], [[ELEMS_OUT]], #4
; CHECK: le lr, [[LOOP]]
-; CHECK: vctp.32 [[ELEMS]]
+; CHECK: vctp.32 [[ELEMS_OUT]]
; CHECK-NEXT: vpsel
; CHECK-NEXT: vaddv.u32
define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c, i32 %N) {
@@ -71,8 +72,8 @@ for.cond.cleanup: ; preds = %middle.block, %entr
; CHECK-LABEL: vpsel_mul_reduce_add_2
; CHECK: dls lr, lr
; CHECK: [[LOOP:.LBB[0-9_]+]]:
-; CHECK: sub{{.*}} [[ELEMS:r[0-9]+]], #4
-; CHECK: vctp.32 [[ELEMS]]
+; CHECK: vctp.32 [[ELEMS:r[0-9]+]]
+; CHECK: mov [[ELEMS_OUT:r[0-9]+]], [[ELEMS]]
; CHECK: vstr p0, [sp
; CHECK: vpstt
; CHECK-NEXT: vldrwt.u32
@@ -85,8 +86,9 @@ for.cond.cleanup: ; preds = %middle.block, %entr
; CHECK: vldr p0, [sp
; CHECK: vpst
; CHECK-NEXT: vldrwt.u32 q{{.*}}, [r0]
+; CHECK: sub{{.*}} [[ELEMS]], [[ELEMS_OUT]], #4
; CHECK: le lr, [[LOOP]]
-; CHECK: vctp.32 [[ELEMS]]
+; CHECK: vctp.32 [[ELEMS_OUT]]
; CHECK-NEXT: vpsel
; CHECK-NEXT: vaddv.u32
define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b,
@@ -147,17 +149,18 @@ for.cond.cleanup: ; preds = %middle.block, %entr
; CHECK-LABEL: and_mul_reduce_add
; CHECK: dls lr, lr
; CHECK: [[LOOP:.LBB[0-9_]+]]:
-; CHECK: sub{{.*}} [[ELEMS:r[0-9]+]],{{.*}}#4
-; CHECK: vctp.32 [[ELEMS]]
+; CHECK: vctp.32 [[ELEMS:r[0-9]+]]
; CHECK: vpstt
; CHECK-NEXT: vldrwt.u32
; CHECK-NEXT: vldrwt.u32
+; CHECK: mov [[ELEMS_OUT:r[0-9]+]], [[ELEMS]]
; CHECK: vpsttt
; CHECK-NEXT: vcmpt.i32 eq, {{.*}}, zr
; CHECK-NEXT: vldrwt.u32 q{{.*}}, [r3]
; CHECK-NEXT: vldrwt.u32 q{{.*}}, [r2]
+; CHECK: sub{{.*}} [[ELEMS]],{{.*}}#4
; CHECK: le lr, [[LOOP]]
-; CHECK: vctp.32 [[ELEMS]]
+; CHECK: vctp.32 [[ELEMS_OUT]]
; CHECK: vpsel
define dso_local i32 @and_mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b,
i32* noalias nocapture readonly %c, i32* noalias nocapture readonly %d, i32 %N) {
@@ -215,9 +218,9 @@ for.cond.cleanup: ; preds = %middle.block, %entr
; CHECK-LABEL: or_mul_reduce_add
; CHECK: dls lr, lr
; CHECK: [[LOOP:.LBB[0-9_]+]]:
-; CHECK: sub{{.*}} [[ELEMS:r[0-9]+]],{{.*}}#4
-; CHECK: vctp.32 [[ELEMS]]
+; CHECK: vctp.32 [[ELEMS:r[0-9]+]]
; CHECK: vstr p0, [sp
+; CHECK: mov [[ELEMS_OUT:r[0-9]+]], [[ELEMS]]
; CHECK: vpstt
; CHECK-NEXT: vldrwt.u32
; CHECK-NEXT: vldrwt.u32
@@ -226,12 +229,13 @@ for.cond.cleanup: ; preds = %middle.block, %entr
; CHECK: vldr p0, [sp
; CHECK: vmrs [[VCTP:r[0-9]+]], p0
; CHECK: orr{{.*}} [[VCMP]], [[VCTP]]
+; CHECK: sub{{.*}} [[ELEMS:r[0-9]+]], [[ELEMS_OUT]], #4
; CHECK-NEXT: vmsr p0
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrwt.u32 q{{.*}}, [r3]
; CHECK-NEXT: vldrwt.u32 q{{.*}}, [r2]
; CHECK: le lr, [[LOOP]]
-; CHECK: vctp.32 [[ELEMS]]
+; CHECK: vctp.32 [[ELEMS_OUT]]
; CHECK: vpsel
define dso_local i32 @or_mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b,
i32* noalias nocapture readonly %c, i32* noalias nocapture readonly %d, i32 %N) {
OpenPOWER on IntegriCloud