ARM: align loops to 4 bytes on Cortex-M3 and Cortex-M4.

The Technical Reference Manuals for these two CPUs state that branching to an unaligned 32-bit instruction incurs an extra pipeline reload penalty. That's bad. This also enables the optimization at -Os since it costs on average one byte per loop in return for 1 cycle per iteration, which is pretty good going. llvm-svn: 342127
author: Tim Northover <tnorthover@apple.com> 2018-09-13 10:28:05 +0000
committer: Tim Northover <tnorthover@apple.com> 2018-09-13 10:28:05 +0000
commit: c15d47bb013e975da582c8fd786ba8234d70d75d (patch)
tree: e13262451793600a29c0df26342fc954e1a8a79a /llvm/test
parent: 95ac65bc32180744cbc67d4e82a0f6417fb92aa9 (diff)
download: bcm5719-llvm-c15d47bb013e975da582c8fd786ba8234d70d75d.tar.gz
bcm5719-llvm-c15d47bb013e975da582c8fd786ba8234d70d75d.zip
1 files changed, 49 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/loop-align-cortex-m.ll b/llvm/test/CodeGen/ARM/loop-align-cortex-m.ll
new file mode 100644
index 00000000000..1b41c1b6c3f
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/loop-align-cortex-m.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple=thumbv7m-none-eabi %s -mcpu=cortex-m3 -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv7m-none-eabi %s -mcpu=cortex-m4 -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv7m-none-eabi %s -mcpu=cortex-m33 -o - | FileCheck %s
+
+define void @test_loop_alignment(i32* %in, i32*  %out) optsize {
+; CHECK-LABEL: test_loop_alignment:
+; CHECK: movs {{r[0-9]+}}, #0
+; CHECK: .p2align 2
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %in.addr = getelementptr inbounds i32, i32* %in, i32 %i
+  %lhs = load i32, i32* %in.addr, align 4
+  %res = mul nsw i32 %lhs, 5
+  %out.addr = getelementptr inbounds i32, i32* %out, i32 %i
+  store i32 %res, i32* %out.addr, align 4
+  %i.next = add i32 %i, 1
+  %done = icmp eq i32 %i.next, 1024
+  br i1 %done, label %end, label %loop
+
+end:
+  ret void
+}
+
+define void @test_loop_alignment_minsize(i32* %in, i32*  %out) minsize {
+; CHECK-LABEL: test_loop_alignment_minsize:
+; CHECK: movs {{r[0-9]+}}, #0
+; CHECK-NOT: .p2align
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %in.addr = getelementptr inbounds i32, i32* %in, i32 %i
+  %lhs = load i32, i32* %in.addr, align 4
+  %res = mul nsw i32 %lhs, 5
+  %out.addr = getelementptr inbounds i32, i32* %out, i32 %i
+  store i32 %res, i32* %out.addr, align 4
+  %i.next = add i32 %i, 1
+  %done = icmp eq i32 %i.next, 1024
+  br i1 %done, label %end, label %loop
+
+end:
+  ret void
+}
author	Tim Northover <tnorthover@apple.com>	2018-09-13 10:28:05 +0000
committer	Tim Northover <tnorthover@apple.com>	2018-09-13 10:28:05 +0000
commit	c15d47bb013e975da582c8fd786ba8234d70d75d (patch)
tree	e13262451793600a29c0df26342fc954e1a8a79a /llvm/test
parent	95ac65bc32180744cbc67d4e82a0f6417fb92aa9 (diff)
download	bcm5719-llvm-c15d47bb013e975da582c8fd786ba8234d70d75d.tar.gz bcm5719-llvm-c15d47bb013e975da582c8fd786ba8234d70d75d.zip