ARM: add backend support for the ABI used in WatchOS

At the LLVM level this ABI is essentially a minimal modification of AAPCS to support 16-byte alignment for vector types and the stack. llvm-svn: 251570
author: Tim Northover <tnorthover@apple.com> 2015-10-28 22:46:43 +0000
committer: Tim Northover <tnorthover@apple.com> 2015-10-28 22:46:43 +0000
commit: e0ccdc6de93e879723c3b1af658913cbc0b8fbdc (patch)
tree: 7caa8763e15e0633a44465e9bb7658d9ff1d1f97 /llvm/test/CodeGen/ARM
parent: 2d4d1615197efeb044f62d6a8721704839cd5337 (diff)
download: bcm5719-llvm-e0ccdc6de93e879723c3b1af658913cbc0b8fbdc.tar.gz
bcm5719-llvm-e0ccdc6de93e879723c3b1af658913cbc0b8fbdc.zip
1 files changed, 146 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/v7k-abi-align.ll b/llvm/test/CodeGen/ARM/v7k-abi-align.ll
new file mode 100644
index 00000000000..f666efc2db9
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/v7k-abi-align.ll
@@ -0,0 +1,146 @@
+; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s
+
+%struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> }
+
+define i32 @test_i64_align() {
+; CHECK-LABEL: test_i64_align:
+; CHECL: movs r0, #8
+  ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32)
+}
+
+define i32 @test_f64_align() {
+; CHECK-LABEL: test_f64_align:
+; CHECL: movs r0, #24
+  ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32)
+}
+
+define i32 @test_v2f32_align() {
+; CHECK-LABEL: test_v2f32_align:
+; CHECL: movs r0, #40
+  ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32)
+}
+
+define i32 @test_v4f32_align() {
+; CHECK-LABEL: test_v4f32_align:
+; CHECL: movs r0, #64
+  ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32)
+}
+
+; Key point here is than an extra register has to be saved so that the DPRs end
+; up in an aligned location (as prologue/epilogue inserter had calculated).
+define void @test_dpr_unwind_align() {
+; CHECK-LABEL: test_dpr_unwind_align:
+; CHECK: push {r5, r6, r7, lr}
+; CHECK-NOT: sub sp
+; CHECK: vpush {d8, d9}
+; [...]
+; CHECK: bl _test_i64_align
+; CHECK-NOT: add sp,
+; CHECK: vpop {d8, d9}
+; CHECK-NOT: add sp,
+; CHECK: pop {r5, r6, r7, pc}
+
+  call void asm sideeffect "", "~{r6},~{d8},~{d9}"()
+
+  ; Whatever
+  call i32 @test_i64_align()
+  ret void
+}
+
+; This time, there's no viable way to tack CS-registers onto the list: a real SP
+; adjustment needs to be performed to put d8 and d9 where they should be.
+define void @test_dpr_unwind_align_manually() {
+; CHECK-LABEL: test_dpr_unwind_align_manually:
+; CHECK: push {r4, r5, r6, r7, lr}
+; CHECK-NOT: sub sp
+; CHECK: push.w {r8, r11}
+; CHECK: sub sp, #4
+; CHECK: vpush {d8, d9}
+; [...]
+; CHECK: bl _test_i64_align
+; CHECK-NOT: add sp,
+; CHECK: vpop {d8, d9}
+; CHECK: add sp, #4
+; CHECK: pop.w {r8, r11}
+; CHECK: pop {r4, r5, r6, r7, pc}
+
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{d8},~{d9}"()
+
+  ; Whatever
+  call i32 @test_i64_align()
+  ret void
+}
+
+; If there's only a CS1 area, the sub should be in the right place:
+define void @test_dpr_unwind_align_just_cs1() {
+; CHECK-LABEL: test_dpr_unwind_align_just_cs1:
+; CHECK: push {r4, r5, r6, r7, lr}
+; CHECK: sub sp, #4
+; CHECK: vpush {d8, d9}
+; CHECK: sub sp, #8
+; [...]
+; CHECK: bl _test_i64_align
+; CHECK: add sp, #8
+; CHECK: vpop {d8, d9}
+; CHECK: add sp, #4
+; CHECK: pop {r4, r5, r6, r7, pc}
+
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{d8},~{d9}"()
+
+  ; Whatever
+  call i32 @test_i64_align()
+  ret void
+}
+
+; If there are no DPRs, we shouldn't try to align the stack in stages anyway
+define void @test_dpr_unwind_align_no_dprs() {
+; CHECK-LABEL: test_dpr_unwind_align_no_dprs:
+; CHECK: push {r4, r5, r6, r7, lr}
+; CHECK: sub sp, #12
+; [...]
+; CHECK: bl _test_i64_align
+; CHECK: add sp, #12
+; CHECK: pop {r4, r5, r6, r7, pc}
+
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"()
+
+  ; Whatever
+  call i32 @test_i64_align()
+  ret void
+}
+
+; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on
+; the stack.
+define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) {
+; CHECK-LABEL: test_v128_stack_pass:
+; CHECK: add r[[ADDR:[0-9]+]], sp, #16
+; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128]
+
+  ret <4 x float> %in
+}
+
+declare void @varargs(i32, ...)
+
+; When varargs are enabled, we go down a different route. Still want 128-bit
+; alignment though.
+define void @test_v128_stack_pass_varargs(<4 x float> %in) {
+; CHECK-LABEL: test_v128_stack_pass_varargs:
+; CHECK: add r[[ADDR:[0-9]+]], sp, #16
+; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128]
+
+  call void(i32, ...) @varargs(i32 undef, [3 x i32] undef, float undef, <4 x float> %in)
+  ret void
+}
+
+; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give
+; a single pointer), 64-bit quantities must be pass
+define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) {
+; CHECK-LABEL: test_64bit_gpr_align:
+; CHECK: ldr [[RHS:r[0-9]+]], [sp]
+; CHECK: adds r0, [[RHS]], r2
+; CHECK: adc r1, r3, #0
+
+  %ext = zext i32 %sp to i64
+  %sum = add i64 %ext, %r2_r3
+  ret i64 %sum
+}
author	Tim Northover <tnorthover@apple.com>	2015-10-28 22:46:43 +0000
committer	Tim Northover <tnorthover@apple.com>	2015-10-28 22:46:43 +0000
commit	e0ccdc6de93e879723c3b1af658913cbc0b8fbdc (patch)
tree	7caa8763e15e0633a44465e9bb7658d9ff1d1f97 /llvm/test/CodeGen/ARM
parent	2d4d1615197efeb044f62d6a8721704839cd5337 (diff)
download	bcm5719-llvm-e0ccdc6de93e879723c3b1af658913cbc0b8fbdc.tar.gz bcm5719-llvm-e0ccdc6de93e879723c3b1af658913cbc0b8fbdc.zip