diff options
author | Artyom Skrobov <Artyom.Skrobov@arm.com> | 2015-12-08 19:59:01 +0000 |
---|---|---|
committer | Artyom Skrobov <Artyom.Skrobov@arm.com> | 2015-12-08 19:59:01 +0000 |
commit | 0a37b80bcb72da01942b7a82326b31f6fce4d2af (patch) | |
tree | d418057931189e00ba23ac4faf15929686890906 | |
parent | 4604ece66bccc0b6dc6bd6c7145403ac543fa120 (diff) | |
download | bcm5719-llvm-0a37b80bcb72da01942b7a82326b31f6fce4d2af.tar.gz bcm5719-llvm-0a37b80bcb72da01942b7a82326b31f6fce4d2af.zip |
Fix ARMv4T (Thumb1) epilogue generation
Summary:
Before ARMv5T, Thumb1 code could not pop PC, as described at D14357 and D14986;
so we need the special fixup in the epilogue.
Reviewers: jroelofs, qcolombet
Subscribers: aemerson, llvm-commits, rengolin
Differential Revision: http://reviews.llvm.org/D15126
llvm-svn: 255047
-rw-r--r-- | llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 41 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/debug-frame.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/Thumb/large-stack.ll | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/Thumb/pop-special-fixup.ll | 60 | ||||
-rw-r--r-- | llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll | 102 |
5 files changed, 132 insertions, 95 deletions
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index fd96af6cb6e..8771c68e593 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -406,9 +406,6 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { if (AFI->getArgRegsSaveSize()) return true; - // FIXME: this doesn't make sense, and the following patch will remove it. - if (!STI.hasV4TOps()) return false; - // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo()) if (CSI.getReg() == ARM::LR) @@ -532,10 +529,32 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, .addReg(PopReg, RegState::Kill)); } + bool AddBx = false; if (MBBI == MBB.end()) { MachineInstr& Pop = MBB.back(); assert(Pop.getOpcode() == ARM::tPOP); Pop.RemoveOperand(Pop.findRegisterDefOperandIdx(ARM::LR)); + } else if (MBBI->getOpcode() == ARM::tPOP_RET) { + // We couldn't use the direct restoration above, so + // perform the opposite conversion: tPOP_RET to tPOP. + MachineInstrBuilder MIB = + AddDefaultPred( + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))); + unsigned Popped = 0; + for (auto MO: MBBI->operands()) + if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && + MO.getReg() != ARM::PC) { + MIB.addOperand(MO); + if (!MO.isImplicit()) + Popped++; + } + // Is there anything left to pop? + if (!Popped) + MBB.erase(MIB.getInstr()); + // Erase the old instruction. + MBB.erase(MBBI); + MBBI = MBB.end(); + AddBx = true; } assert(PopReg && "Do not know how to get LR"); @@ -554,14 +573,20 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, return true; } - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::LR, RegState::Define) - .addReg(PopReg, RegState::Kill)); - + if (AddBx && !TemporaryReg) { + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) + .addReg(PopReg, RegState::Kill)); + } else { + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill)); + } if (TemporaryReg) { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(PopReg, RegState::Define) .addReg(TemporaryReg, RegState::Kill)); + if (AddBx) + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET))); } return true; @@ -628,7 +653,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (isVarArg) continue; // ARMv4T requires BX, see emitEpilogue - if (STI.hasV4TOps() && !STI.hasV5TOps()) + if (!STI.hasV5TOps()) continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); diff --git a/llvm/test/CodeGen/ARM/debug-frame.ll b/llvm/test/CodeGen/ARM/debug-frame.ll index 33d38969899..4bd401b6849 100644 --- a/llvm/test/CodeGen/ARM/debug-frame.ll +++ b/llvm/test/CodeGen/ARM/debug-frame.ll @@ -30,11 +30,11 @@ ; RUN: -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM -; RUN: llc -mtriple thumb-unknown-linux-gnueabi \ +; RUN: llc -mtriple thumbv5-unknown-linux-gnueabi \ ; RUN: -disable-fp-elim -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP -; RUN: llc -mtriple thumb-unknown-linux-gnueabi \ +; RUN: llc -mtriple thumbv5-unknown-linux-gnueabi \ ; RUN: -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP-ELIM diff --git a/llvm/test/CodeGen/Thumb/large-stack.ll b/llvm/test/CodeGen/Thumb/large-stack.ll index 0d534589ae0..c5d1044e9d6 100644 --- a/llvm/test/CodeGen/Thumb/large-stack.ll +++ b/llvm/test/CodeGen/Thumb/large-stack.ll @@ -32,10 +32,10 @@ define void @test100() { ; Smallest stack for which we use a constant pool define void @test2() { ; CHECK-LABEL: test2: -; CHECK: ldr r0, -; CHECK: add sp, r0 -; EABI: ldr r0, -; EABI: add sp, r0 +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] +; EABI: ldr [[TEMP:r[0-7]]], +; EABI: add sp, [[TEMP]] ; IOS: subs r4, r7, #4 ; IOS: mov sp, r4 %tmp = alloca [ 1528 x i8 ] , align 4 @@ -44,12 +44,12 @@ define void @test2() { define i32 @test3() { ; CHECK-LABEL: test3: -; CHECK: ldr r1, -; CHECK: add sp, r1 -; CHECK: ldr r1, -; CHECK: add r1, sp -; EABI: ldr r1, -; EABI: add sp, r1 +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] +; CHECK: ldr [[TEMP]], +; CHECK: add [[TEMP]], sp +; EABI: ldr [[TEMP:r[0-7]]], +; EABI: add sp, [[TEMP]] ; IOS: subs r4, r7, #4 ; IOS: mov sp, r4 %retval = alloca i32, align 4 diff --git a/llvm/test/CodeGen/Thumb/pop-special-fixup.ll b/llvm/test/CodeGen/Thumb/pop-special-fixup.ll deleted file mode 100644 index 9ba589d6cec..00000000000 --- a/llvm/test/CodeGen/Thumb/pop-special-fixup.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: llc %s -enable-shrink-wrap=true -o - | FileCheck %s - -target triple = "thumbv6m-none-none-eabi" - -@retval = global i32 0, align 4 - -define i32 @test(i32 %i, i32 %argc, i8** nocapture readonly %argv) { - %1 = icmp sgt i32 %argc, %i - br i1 %1, label %2, label %19 - - %3 = getelementptr inbounds i8*, i8** %argv, i32 %i - %4 = load i8*, i8** %3, align 4 - %5 = load i8, i8* %4, align 1 - %6 = icmp eq i8 %5, 45 - %7 = getelementptr inbounds i8, i8* %4, i32 1 - %. = select i1 %6, i8* %7, i8* %4 - %.1 = select i1 %6, i32 -1, i32 1 - %8 = load i8, i8* %., align 1 - %.off2 = add i8 %8, -48 - %9 = icmp ult i8 %.off2, 10 - %.pre = load i32, i32* @retval, align 4 - br i1 %9, label %.lr.ph.preheader, label %.critedge - -.lr.ph.preheader: ; preds = %2 - br label %.lr.ph - -.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph - %10 = phi i32 [ %14, %.lr.ph ], [ %.pre, %.lr.ph.preheader ] - %11 = phi i8 [ %15, %.lr.ph ], [ %8, %.lr.ph.preheader ] - %valstring.03 = phi i8* [ %13, %.lr.ph ], [ %., %.lr.ph.preheader ] - %12 = zext i8 %11 to i32 - %13 = getelementptr inbounds i8, i8* %valstring.03, i32 1 - %14 = add nsw i32 %10, %12 - store i32 %14, i32* @retval, align 4 - %15 = load i8, i8* %13, align 1 - %.off = add i8 %15, -48 - %16 = icmp ult i8 %.off, 10 - br i1 %16, label %.lr.ph, label %.critedge.loopexit - -.critedge.loopexit: ; preds = %.lr.ph - %.lcssa = phi i32 [ %14, %.lr.ph ] - br label %.critedge - -.critedge: ; preds = %.critedge.loopexit, %2 - %17 = phi i32 [ %.pre, %2 ], [ %.lcssa, %.critedge.loopexit ] - %18 = mul nsw i32 %17, %.1 - store i32 %18, i32* @retval, align 4 - br label %19 - -; <label>:19 ; preds = %.critedge, %0 - ret i32 0 -} - -; CHECK: push {r4, r5, r7, lr} -; CHECK: pop {r4, r5, r7} -; CHECK: pop {r0} -; CHECK: mov lr, r0 -; CHECK: movs r0, #0 -; CHECK: bx lr - diff --git a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll index 09c2ae3b4f7..e68ca0bd78c 100644 --- a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll +++ b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -1,7 +1,11 @@ ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V4T +; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V5T ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V4T +; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V5T ; ; Note: Lots of tests use inline asm instead of regular calls. ; This allows to have a better control on what the allocation will do. @@ -39,14 +43,20 @@ ; ; With shrink-wrapping, epilogue is just after the call. ; ENABLE-NEXT: add sp, #8 -; ENABLE-NEXT: pop {r7, lr} +; ENABLE-V5T-NEXT: pop {r7, pc} +; ENABLE-V4T-NEXT: pop {r7} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: mov lr, r1 ; ; CHECK: [[EXIT_LABEL]]: ; ; Without shrink-wrapping, epilogue is in the exit block. ; Epilogue code. (What we pop does not matter.) ; DISABLE: add sp, #8 -; DISABLE-NEXT: pop {r7, pc} +; DISABLE-V5T-NEXT: pop {r7, pc} +; DISABLE-V4T-NEXT: pop {r7} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 ; ; ENABLE-NEXT: bx lr define i32 @foo(i32 %a, i32 %b) { @@ -64,6 +74,42 @@ false: ret i32 %tmp.0 } + +; Same, but the final BB is non-trivial, so we don't duplicate the return inst. +; CHECK-LABEL: bar: +; +; With shrink-wrapping, epilogue is just after the call. +; CHECK: bl +; ENABLE-NEXT: add sp, #8 +; ENABLE-NEXT: pop {r7} +; ENABLE-NEXT: pop {r0} +; ENABLE-NEXT: mov lr, r0 +; +; CHECK: movs r0, #42 +; +; Without shrink-wrapping, epilogue is in the exit block. +; Epilogue code. (What we pop does not matter.) +; DISABLE: add sp, #8 +; DISABLE-V5T-NEXT: pop {r7, pc} +; DISABLE-V4T-NEXT: pop {r7} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; +; ENABLE-NEXT: bx lr +define i32 @bar(i32 %a, i32 %b) { + %tmp = alloca i32, align 4 + %tmp2 = icmp slt i32 %a, %b + br i1 %tmp2, label %true, label %false + +true: + store i32 %a, i32* %tmp, align 4 + %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) + br label %false + +false: + ret i32 42 +} + ; Function Attrs: optsize declare i32 @doSomething(i32, i32*) @@ -101,12 +147,17 @@ declare i32 @doSomething(i32, i32*) ; CHECK: lsls [[SUM]], [[SUM]], #3 ; ; Duplicated epilogue. -; DISABLE: pop {r4, pc} +; DISABLE-V5T: pop {r4, pc} +; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]] ; ; CHECK: [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. ; CHECK: lsls r0, r1, #1 -; DISABLE-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 ; ; ENABLE-NEXT: bx lr define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { @@ -222,12 +273,17 @@ for.end: ; preds = %for.body ; ENABLE-NEXT: pop {r4, lr} ; ; Duplicated epilogue. -; DISABLE: pop {r4, pc} +; DISABLE-V5T: pop {r4, pc} +; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]] ; ; CHECK: [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. ; CHECK: lsls r0, r1, #1 -; DISABLE-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 ; ; ENABLE-NEXT: bx lr define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { @@ -297,12 +353,17 @@ declare void @somethingElse(...) ; ENABLE: pop {r4, lr} ; ; Duplicated epilogue. -; DISABLE: pop {r4, pc} +; DISABLE-V5T: pop {r4, pc} +; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]] ; ; CHECK: [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. ; CHECK: lsls r0, r1, #1 -; DISABLE-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 ; ; ENABLE-NEXT: bx lr define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { @@ -373,12 +434,17 @@ entry: ; ENABLE-NEXT: pop {r4, lr} ; ; Duplicated epilogue. -; DISABLE-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]] ; ; CHECK: [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. ; CHECK: lsls r0, r1, #1 -; DISABLE-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 ; ; ENABLE-NEXT: bx lr define i32 @inlineAsm(i32 %cond, i32 %N) { @@ -438,12 +504,14 @@ if.end: ; preds = %for.body, %if.else ; CHECK-NEXT: pop {r3} ; CHECK-NEXT: bl ; CHECK-NEXT: lsls r0, r0, #3 -; CHECK-NEXT: add sp, #16 ; +; ENABLE-NEXT: add sp, #16 ; ENABLE-NEXT: pop {[[TMP]], lr} ; ; Duplicated epilogue. -; DISABLE-NEXT: pop {[[TMP]], pc} +; DISABLE-V5T-NEXT: add sp, #16 +; DISABLE-V5T-NEXT: pop {[[TMP]], pc} +; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]] ; ; CHECK: [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. @@ -452,8 +520,12 @@ if.end: ; preds = %for.body, %if.else ; Epilogue code. ; ENABLE-NEXT: bx lr ; +; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end ; DISABLE-NEXT: add sp, #16 -; DISABLE-NEXT: pop {[[TMP]], pc} +; DISABLE-V5T-NEXT: pop {[[TMP]], pc} +; DISABLE-V4T-NEXT: pop {[[TMP]]} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 define i32 @callVariadicFunc(i32 %cond, i32 %N) { entry: %tobool = icmp eq i32 %cond, 0 |