diff options
author | Tim Northover <tnorthover@apple.com> | 2014-11-05 00:27:13 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2014-11-05 00:27:13 +0000 |
commit | 228c943f316eea630dfb94e270af7e342bd5dd56 (patch) | |
tree | b8616cc8616979c8b30d4511a3b505699cbf28e0 /llvm/test/CodeGen/ARM/dwarf-unwind.ll | |
parent | 445b0657a50fbeefcdcd954f4779c8ae933e8d75 (diff) | |
download | bcm5719-llvm-228c943f316eea630dfb94e270af7e342bd5dd56.tar.gz bcm5719-llvm-228c943f316eea630dfb94e270af7e342bd5dd56.zip |
ARM/Dwarf: correctly align stack before callee-saved VPRs
We were making an attempt to do this by adding an extra callee-saved GPR (so
that there was an even number in the list), but when that failed we went ahead
and pushed anyway.
This had a couple of potential issues:
+ The .cfi directives we emit misplaced dN because they were based on
PrologEpilogInserter's calculation.
+ Unaligned stores can be less efficient.
+ Unaligned stores can actually fault (likely only an issue in niche cases,
but possible).
This adds a final explicit stack adjustment if all other options fail, so that
the actual locations of the registers match up with where they should be.
llvm-svn: 221320
Diffstat (limited to 'llvm/test/CodeGen/ARM/dwarf-unwind.ll')
-rw-r--r-- | llvm/test/CodeGen/ARM/dwarf-unwind.ll | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/dwarf-unwind.ll b/llvm/test/CodeGen/ARM/dwarf-unwind.ll new file mode 100644 index 00000000000..58f486d76ab --- /dev/null +++ b/llvm/test/CodeGen/ARM/dwarf-unwind.ll @@ -0,0 +1,68 @@ +; RUN: llc -mtriple=thumbv7-netbsd-eabi -o - %s | FileCheck %s +declare void @bar() + +; ARM's frame lowering attempts to tack another callee-saved register onto the +; list when it detects a potential misaligned VFP store. However, if there are +; none available it used to just vpush anyway and misreport the location of the +; registers in unwind info. Since there are benefits to aligned stores, it's +; better to correct the code than the .cfi_offset directive. + +define void @test_dpr_align(i8 %l, i8 %r) { +; CHECK-LABEL: test_dpr_align: +; CHECK: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK: sub sp, #4 +; CHECK: vpush {d8} +; CHECK: .cfi_offset d8, -48 +; CHECK-NOT: sub sp +; [...] +; CHECK: bl bar +; CHECK-NOT: add sp +; CHECK: vpop {d8} +; CHECK: add sp, #4 +; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} + call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"() + call void @bar() + ret void +} + +; The prologue (but not the epilogue) can be made more space efficient by +; chucking an argument register into the list. Not worth it in general though, +; "sub sp, #4" is likely faster. +define void @test_dpr_align_tiny(i8 %l, i8 %r) minsize { +; CHECK-LABEL: test_dpr_align_tiny: +; CHECK: push.w {r3, r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NOT: sub sp +; CHECK: vpush {d8} +; CHECK: .cfi_offset d8, -48 +; CHECK-NOT: sub sp +; [...] +; CHECK: bl bar +; CHECK-NOT: add sp +; CHECK: vpop {d8} +; CHECK: add sp, #4 +; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} + call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"() + call void @bar() + ret void +} + + +; However, we shouldn't do a 2-step align/adjust if there are no DPRs to be +; saved. +define void @test_nodpr_noalign(i8 %l, i8 %r) { +; CHECK-LABEL: test_nodpr_noalign: +; CHECK: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NOT: sub sp +; CHECK: sub sp, #12 +; CHECK-NOT: sub sp +; [...] +; CHECK: bl bar +; CHECK-NOT: add sp +; CHECK: add sp, #12 +; CHECK-NOT: add sp +; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} + alloca i64 + call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"() + call void @bar() + ret void +} |