diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMFastISel.cpp | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/cxx-tlscc.ll | 106 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/cxx-tlscc.ll | 50 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/cxx_tlscc64.ll | 63 |
6 files changed, 221 insertions, 2 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 1a0c902d907..8c0ff29ad77 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -473,7 +473,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // We split CSR if the target supports it for the given function // and the function has only return exits. - if (TLI->supportSplitCSR(MF)) { + if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) { FuncInfo->SplitCSR = true; // Collect all the return blocks. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 86d3bb0c761..86b006da75d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2469,6 +2469,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::C: case CallingConv::Fast: case CallingConv::PreserveMost: + case CallingConv::CXX_FAST_TLS: if (!Subtarget->isTargetDarwin()) return CC_AArch64_AAPCS; return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 088638b10f7..361552568b6 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -1847,6 +1847,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, } // Fallthrough case CallingConv::C: + case CallingConv::CXX_FAST_TLS: // Use target triple & subtarget features to do actual dispatch. if (Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && diff --git a/llvm/test/CodeGen/AArch64/cxx-tlscc.ll b/llvm/test/CodeGen/AArch64/cxx-tlscc.ll index 9996c0d3aba..9219132d1bf 100644 --- a/llvm/test/CodeGen/AArch64/cxx-tlscc.ll +++ b/llvm/test/CodeGen/AArch64/cxx-tlscc.ll @@ -3,6 +3,8 @@ ; Shrink wrapping currently does not kick in because we have a TLS CALL ; in the entry block and it will clobber the link register. +; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck --check-prefix=CHECK-O0 %s + %struct.S = type { i8 } @sg = internal thread_local global %struct.S zeroinitializer, align 1 @@ -76,6 +78,52 @@ __tls_init.exit: ; CHECK-NOT: ldp d29, d28 ; CHECK-NOT: ldp d31, d30 +; CHECK-O0-LABEL: _ZTW2sg +; CHECK-O0: stp d31, d30 +; CHECK-O0: stp d29, d28 +; CHECK-O0: stp d27, d26 +; CHECK-O0: stp d25, d24 +; CHECK-O0: stp d23, d22 +; CHECK-O0: stp d21, d20 +; CHECK-O0: stp d19, d18 +; CHECK-O0: stp d17, d16 +; CHECK-O0: stp d7, d6 +; CHECK-O0: stp d5, d4 +; CHECK-O0: stp d3, d2 +; CHECK-O0: stp d1, d0 +; CHECK-O0: stp x14, x13 +; CHECK-O0: stp x12, x11 +; CHECK-O0: stp x10, x9 +; CHECK-O0: stp x8, x7 +; CHECK-O0: stp x6, x5 +; CHECK-O0: stp x4, x3 +; CHECK-O0: stp x2, x1 +; CHECK-O0: blr +; CHECK-O0: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]] +; CHECK-O0: blr +; CHECK-O0: tlv_atexit +; CHECK-O0: [[BB_end]]: +; CHECK-O0: blr +; CHECK-O0: ldp x2, x1 +; CHECK-O0: ldp x4, x3 +; CHECK-O0: ldp x6, x5 +; CHECK-O0: ldp x8, x7 +; CHECK-O0: ldp x10, x9 +; CHECK-O0: ldp x12, x11 +; CHECK-O0: ldp x14, x13 +; CHECK-O0: ldp d1, d0 +; CHECK-O0: ldp d3, d2 +; CHECK-O0: ldp d5, d4 +; CHECK-O0: ldp d7, d6 +; CHECK-O0: ldp d17, d16 +; CHECK-O0: ldp d19, d18 +; CHECK-O0: ldp d21, d20 +; CHECK-O0: ldp d23, d22 +; CHECK-O0: ldp d25, d24 +; CHECK-O0: ldp d27, d26 +; CHECK-O0: ldp d29, d28 +; CHECK-O0: ldp d31, d30 + ; CHECK-LABEL: _ZTW4sum1 ; CHECK-NOT: stp d31, d30 ; CHECK-NOT: stp d29, d28 @@ -98,6 +146,64 @@ __tls_init.exit: ; CHECK-NOT: stp x4, x3 ; CHECK-NOT: stp x2, x1 ; CHECK: blr + +; CHECK-O0-LABEL: _ZTW4sum1 +; CHECK-O0-NOT: vstr +; CHECK-O0-NOT: vldr define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { ret i32* @sum1 } + +; Make sure at O0, we don't generate spilling/reloading of the CSRs. +; CHECK-O0-LABEL: tls_test2 +; CHECK-O0-NOT: stp d31, d30 +; CHECK-O0-NOT: stp d29, d28 +; CHECK-O0-NOT: stp d27, d26 +; CHECK-O0-NOT: stp d25, d24 +; CHECK-O0-NOT: stp d23, d22 +; CHECK-O0-NOT: stp d21, d20 +; CHECK-O0-NOT: stp d19, d18 +; CHECK-O0-NOT: stp d17, d16 +; CHECK-O0-NOT: stp d7, d6 +; CHECK-O0-NOT: stp d5, d4 +; CHECK-O0-NOT: stp d3, d2 +; CHECK-O0-NOT: stp d1, d0 +; CHECK-O0-NOT: stp x20, x19 +; CHECK-O0-NOT: stp x14, x13 +; CHECK-O0-NOT: stp x12, x11 +; CHECK-O0-NOT: stp x10, x9 +; CHECK-O0-NOT: stp x8, x7 +; CHECK-O0-NOT: stp x6, x5 +; CHECK-O0-NOT: stp x4, x3 +; CHECK-O0-NOT: stp x2, x1 +; CHECK-O0: bl {{.*}}tls_helper +; CHECK-O0-NOT: ldp x2, x1 +; CHECK-O0-NOT: ldp x4, x3 +; CHECK-O0-NOT: ldp x6, x5 +; CHECK-O0-NOT: ldp x8, x7 +; CHECK-O0-NOT: ldp x10, x9 +; CHECK-O0-NOT: ldp x12, x11 +; CHECK-O0-NOT: ldp x14, x13 +; CHECK-O0-NOT: ldp x20, x19 +; CHECK-O0-NOT: ldp d1, d0 +; CHECK-O0-NOT: ldp d3, d2 +; CHECK-O0-NOT: ldp d5, d4 +; CHECK-O0-NOT: ldp d7, d6 +; CHECK-O0-NOT: ldp d17, d16 +; CHECK-O0-NOT: ldp d19, d18 +; CHECK-O0-NOT: ldp d21, d20 +; CHECK-O0-NOT: ldp d23, d22 +; CHECK-O0-NOT: ldp d25, d24 +; CHECK-O0-NOT: ldp d27, d26 +; CHECK-O0-NOT: ldp d29, d28 +; CHECK-O0-NOT: ldp d31, d30 +; CHECK-O0: ret +%class.C = type { i32 } +@tC = internal thread_local global %class.C zeroinitializer, align 4 +declare cxx_fast_tlscc void @tls_helper() +define cxx_fast_tlscc %class.C* @tls_test2() #1 { + call cxx_fast_tlscc void @tls_helper() + ret %class.C* @tC +} +attributes #0 = { nounwind "no-frame-pointer-elim"="true" } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/ARM/cxx-tlscc.ll b/llvm/test/CodeGen/ARM/cxx-tlscc.ll index 11173bbb197..e8e3b6d1973 100644 --- a/llvm/test/CodeGen/ARM/cxx-tlscc.ll +++ b/llvm/test/CodeGen/ARM/cxx-tlscc.ll @@ -3,6 +3,9 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios8.0 | FileCheck %s ; RUN: llc < %s -mtriple=armv7-apple-ios8.0 -enable-shrink-wrap=true | FileCheck --check-prefix=CHECK %s +; RUN: llc < %s -mtriple=armv7k-apple-watchos2.0 -O0 | FileCheck --check-prefix=CHECK-O0 --check-prefix=WATCH-O0 %s +; RUN: llc < %s -mtriple=armv7-apple-ios8.0 -O0 | FileCheck --check-prefix=CHECK-O0 --check-prefix=IOS-O0 %s + %struct.S = type { i8 } @sg = internal thread_local global %struct.S zeroinitializer, align 1 @@ -10,6 +13,9 @@ @__tls_guard = internal thread_local unnamed_addr global i1 false @sum1 = internal thread_local global i32 0, align 4 +%class.C = type { i32 } +@tC = internal thread_local global %class.C zeroinitializer, align 4 + declare %struct.S* @_ZN1SC1Ev(%struct.S* returned) declare %struct.S* @_ZN1SD1Ev(%struct.S* returned) declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*) @@ -36,7 +42,7 @@ __tls_init.exit: ; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7} ; CHECK: blx ; CHECK: bne [[BB_end:.?LBB0_[0-9]+]] -; CHECK; blx +; CHECK: blx ; CHECK: tlv_atexit ; CHECK: [[BB_end]]: ; CHECK: blx @@ -46,12 +52,54 @@ __tls_init.exit: ; CHECK-NOT: pop {r1, r2, r3, r4, r7, pc} ; CHECK: pop {lr} +; CHECK-O0-LABEL: _ZTW2sg +; WATCH-O0: push {r1, r2, r3, r6, r7, lr} +; IOS-O0: push {r1, r2, r3, r7, lr} +; CHECK-O0: push {r9, r12} +; CHECK-O0: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} +; CHECK-O0: vpush {d0, d1, d2, d3, d4, d5, d6, d7} +; CHECK-O0: blx +; CHECK-O0: bne [[BB_end:.?LBB0_[0-9]+]] +; CHECK-O0: blx +; CHECK-O0: tlv_atexit +; CHECK-O0: [[BB_end]]: +; CHECK-O0: blx +; CHECK-O0: vpop {d0, d1, d2, d3, d4, d5, d6, d7} +; CHECK-O0: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} +; CHECK-O0: pop {r9, r12} +; WATCH-O0: pop {r1, r2, r3, r6, r7, pc} +; IOS-O0: pop {r1, r2, r3, r7, pc} + ; CHECK-LABEL: _ZTW4sum1 ; CHECK-NOT: push {r1, r2, r3, r4, r7, lr} ; CHECK-NOT: push {r9, r12} ; CHECK-NOT: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} ; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7} ; CHECK: blx + +; CHECK-O0-LABEL: _ZTW4sum1 +; CHECK-O0-NOT: vpush +; CHECK-O0-NOT: vstr +; CHECK-O0-NOT: vpop +; CHECK-O0-NOT: vldr +; CHECK-O0: pop define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { ret i32* @sum1 } + +; Make sure at O0, we don't generate spilling/reloading of the CSRs. +; CHECK-O0-LABEL: tls_test2 +; CHECK-O0: push +; CHECK-O0-NOT: vpush +; CHECK-O0-NOT: vstr +; CHECK-O0: tls_helper +; CHECK-O0-NOT: vpop +; CHECK-O0-NOT: vldr +; CHECK-O0: pop +declare cxx_fast_tlscc void @tls_helper() +define cxx_fast_tlscc %class.C* @tls_test2() #1 { + call cxx_fast_tlscc void @tls_helper() + ret %class.C* @tC +} +attributes #0 = { nounwind "no-frame-pointer-elim"="true" } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/X86/cxx_tlscc64.ll b/llvm/test/CodeGen/X86/cxx_tlscc64.ll index 6c8e45e42d1..149384549e9 100644 --- a/llvm/test/CodeGen/X86/cxx_tlscc64.ll +++ b/llvm/test/CodeGen/X86/cxx_tlscc64.ll @@ -39,6 +39,27 @@ declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*) ; CHECK-NOT: popq %r9 ; CHECK-NOT: popq %r10 ; CHECK-NOT: popq %r11 + +; CHECK-O0-LABEL: _ZTW2sg +; CHECK-O0: pushq %r11 +; CHECK-O0: pushq %r10 +; CHECK-O0: pushq %r9 +; CHECK-O0: pushq %r8 +; CHECK-O0: pushq %rsi +; CHECK-O0: pushq %rdx +; CHECK-O0: pushq %rcx +; CHECK-O0: callq +; CHECK-O0: jne +; CHECK-O0: callq +; CHECK-O0: tlv_atexit +; CHECK-O0: callq +; CHECK-O0: popq %rcx +; CHECK-O0: popq %rdx +; CHECK-O0: popq %rsi +; CHECK-O0: popq %r8 +; CHECK-O0: popq %r9 +; CHECK-O0: popq %r10 +; CHECK-O0: popq %r11 define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind { %.b.i = load i1, i1* @__tls_guard, align 1 br i1 %.b.i, label %__tls_init.exit, label %init.i @@ -63,6 +84,24 @@ __tls_init.exit: ; CHECK-NOT: pushq %rcx ; CHECK-NOT: pushq %rbx ; CHECK: callq +; CHECK-O0-LABEL: _ZTW4sum1 +; CHECK-O0-NOT: pushq %r11 +; CHECK-O0-NOT: pushq %r10 +; CHECK-O0-NOT: pushq %r9 +; CHECK-O0-NOT: pushq %r8 +; CHECK-O0-NOT: pushq %rsi +; CHECK-O0-NOT: pushq %rdx +; CHECK-O0-NOT: pushq %rcx +; CHECK-O0-NOT: pushq %rbx +; CHECK-O0-NOT: movq %r11 +; CHECK-O0-NOT: movq %r10 +; CHECK-O0-NOT: movq %r9 +; CHECK-O0-NOT: movq %r8 +; CHECK-O0-NOT: movq %rsi +; CHECK-O0-NOT: movq %rdx +; CHECK-O0-NOT: movq %rcx +; CHECK-O0-NOT: movq %rbx +; CHECK-O0: callq define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { ret i32* @sum1 } @@ -76,4 +115,28 @@ define cxx_fast_tlscc i32* @_ZTW4sum2() #0 { ret i32* @sum1 } +; Make sure at O0, we don't generate spilling/reloading of the CSRs. +; CHECK-O0-LABEL: tls_test2 +; CHECK-O0-NOT: pushq %r11 +; CHECK-O0-NOT: pushq %r10 +; CHECK-O0-NOT: pushq %r9 +; CHECK-O0-NOT: pushq %r8 +; CHECK-O0-NOT: pushq %rsi +; CHECK-O0-NOT: pushq %rdx +; CHECK-O0: callq {{.*}}tls_helper +; CHECK-O0-NOT: popq %rdx +; CHECK-O0-NOT: popq %rsi +; CHECK-O0-NOT: popq %r8 +; CHECK-O0-NOT: popq %r9 +; CHECK-O0-NOT: popq %r10 +; CHECK-O0-NOT: popq %r11 +; CHECK-O0: ret +%class.C = type { i32 } +@tC = internal thread_local global %class.C zeroinitializer, align 4 +declare cxx_fast_tlscc void @tls_helper() +define cxx_fast_tlscc %class.C* @tls_test2() #1 { + call cxx_fast_tlscc void @tls_helper() + ret %class.C* @tC +} attributes #0 = { nounwind "no-frame-pointer-elim"="true" } +attributes #1 = { nounwind } |

