6 files changed, 227 insertions, 2 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 8c98d626b6d..deb40291a8f 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -447,6 +447,14 @@ public:
     llvm_unreachable("target does not provide no preserved mask");
   }
 
+  /// Return a list of all of the registers which are clobbered "inside" a call
+  /// to the given function. For example, these might be needed for PLT
+  /// sequences of long-branch veneers.
+  virtual ArrayRef<MCPhysReg>
+  getIntraCallClobberedRegs(const MachineFunction *MF) const {
+    return {};
+  }
+
   /// Return true if all bits that are set in mask \p mask0 are also set in
   /// \p mask1.
   bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const;
diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index b37dfada710..64552b58e2e 100644
--- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -142,6 +142,13 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
   auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
     RegMask[Reg / 32] &= ~(1u << Reg % 32);
   };
+
+  // Some targets can clobber registers "inside" a call, typically in
+  // linker-generated code.
+  for (const MCPhysReg Reg : TRI->getIntraCallClobberedRegs(&MF))
+    for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+      SetRegAsDefined(*AI);
+
   // Scan all the physical registers. When a register is defined in the current
   // function set it and all the aliasing registers as defined in the regmask.
   // FIXME: Rewrite to use regunits.
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index e63ea7a4447..b131227d49e 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -174,6 +174,12 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
                               : CSR_AAPCS_ThisReturn_RegMask;
 }
 
+ArrayRef<MCPhysReg> ARMBaseRegisterInfo::getIntraCallClobberedRegs(
+    const MachineFunction *MF) const {
+  static MCPhysReg IntraCallClobberedRegs[] = {ARM::R12};
+  return IntraCallClobberedRegs;
+}
+
 BitVector ARMBaseRegisterInfo::
 getReservedRegs(const MachineFunction &MF) const {
   const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 7e2c72b4d71..37248ac6ecf 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -129,6 +129,9 @@ public:
   const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
                                              CallingConv::ID) const;
 
+  ArrayRef<MCPhysReg>
+  getIntraCallClobberedRegs(const MachineFunction *MF) const override;
+
   BitVector getReservedRegs(const MachineFunction &MF) const override;
   bool isAsmClobberable(const MachineFunction &MF,
                        unsigned PhysReg) const override;
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 7ae0c6ab3cf..d2a5111a4de 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -2080,9 +2080,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
             ExtraCSSpill = true;
         }
       }
-      if (!ExtraCSSpill) {
+      if (!ExtraCSSpill && RS) {
         // Reserve a slot closest to SP or frame pointer.
-        assert(RS && "Register scavenging not provided");
         LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
         const TargetRegisterClass &RC = ARM::GPRRegClass;
         unsigned Size = TRI->getSpillSize(RC);
diff --git a/llvm/test/CodeGen/ARM/ipra.ll b/llvm/test/CodeGen/ARM/ipra.ll
new file mode 100644
index 00000000000..3252c132583
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/ipra.ll
@@ -0,0 +1,202 @@
+; RUN: llc -mtriple armv7a--none-eabi < %s              | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLED
+; RUN: llc -mtriple armv7a--none-eabi < %s -enable-ipra | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLED
+
+define void @leaf()  {
+entry:
+  tail call void asm sideeffect "", ""()
+  ret void
+}
+
+define void @leaf_r3() {
+entry:
+  tail call void asm sideeffect "", "~{r3}"()
+  ret void
+}
+
+define void @leaf_r4() {
+entry:
+  tail call void asm sideeffect "", "~{r4}"()
+  ret void
+}
+
+define void @leaf_s0() {
+entry:
+  tail call void asm sideeffect "", "~{s0}"()
+  ret void
+}
+
+define void @leaf_d0() {
+entry:
+  tail call void asm sideeffect "", "~{d0}"()
+  ret void
+}
+
+; r3 is normally caller-saved, but with IPRA we can see that it isn't used in
+; the callee, so can leave a live value in it.
+define void @test_r3_presrved() {
+; CHECK-LABEL: test_r3_presrved:
+entry:
+; CHECK: ASM1: r3
+; DISABLED: mov [[TEMP:r[0-9]+]], r3
+; ENABLED-NOT: r3
+; CHECK: bl      leaf
+; DISABLED: mov r3, [[TEMP]]
+; ENABLED-NOT: r3
+; CHECK: ASM2: r3
+  %a = tail call i32 asm sideeffect "// ASM1: $0", "={r3},0"(i32 undef)
+  tail call void @leaf()
+  %b = tail call i32 asm sideeffect "// ASM2: $0", "={r3},0"(i32 %a)
+  ret void
+}
+
+; Same as above, but r3 is clobbered in the callee, so it is clobbered by the
+; call as normal.
+define void @test_r3_clobbered() {
+; CHECK-LABEL: test_r3_clobbered:
+entry:
+; CHECK: ASM1: r3
+; CHECK: mov [[TEMP:r[0-9]+]], r3
+; CHECK: bl      leaf
+; CHECK: mov r3, [[TEMP]]
+; CHECK: ASM2: r3
+  %a = tail call i32 asm sideeffect "// ASM1: $0", "={r3},0"(i32 undef)
+  tail call void @leaf_r3()
+  %b = tail call i32 asm sideeffect "// ASM2: $0", "={r3},0"(i32 %a)
+  ret void
+}
+
+; r4 is a callee-saved register, so IPRA has no effect.
+define void @test_r4_preserved() {
+; CHECK-LABEL: test_r4_preserved:
+entry:
+; CHECK: ASM1: r4
+; CHECK-NOT: r4
+; CHECK: bl      leaf
+; CHECK-NOT: r4
+; CHECK: ASM2: r4
+  %a = tail call i32 asm sideeffect "// ASM1: $0", "={r4},0"(i32 undef)
+  tail call void @leaf()
+  %b = tail call i32 asm sideeffect "// ASM2: $0", "={r4},0"(i32 %a)
+  ret void
+}
+define void @test_r4_clobbered() {
+; CHECK-LABEL: test_r4_clobbered:
+entry:
+; CHECK: ASM1: r4
+; CHECK-NOT: r4
+; CHECK: bl      leaf_r4
+; CHECK-NOT: r4
+; CHECK: ASM2: r4
+  %a = tail call i32 asm sideeffect "// ASM1: $0", "={r4},0"(i32 undef)
+  tail call void @leaf_r4()
+  %b = tail call i32 asm sideeffect "// ASM2: $0", "={r4},0"(i32 %a)
+  ret void
+}
+
+; r12 is the intra-call scratch register, so we have to assume it is clobbered
+; even if we can see that the callee does not touch it.
+define void @test_r12() {
+; CHECK-LABEL: test_r12:
+entry:
+; CHECK: ASM1: r12
+; CHECK: mov [[TEMP:r[0-9]+]], r12
+; CHECK: bl      leaf
+; CHECK: mov r12, [[TEMP]]
+; CHECK: ASM2: r12
+  %a = tail call i32 asm sideeffect "// ASM1: $0", "={r12},0"(i32 undef)
+  tail call void @leaf()
+  %b = tail call i32 asm sideeffect "// ASM2: $0", "={r12},0"(i32 %a)
+  ret void
+}
+
+; s0 and d0 are caller-saved, IPRA allows us to keep them live in the caller if
+; the callee doesn't modify them.
+define void @test_s0_preserved() {
+; CHECK-LABEL: test_s0_preserved:
+entry:
+; CHECK: ASM1: s0
+; DISABLED: vmov.f32 [[TEMP:s[0-9]+]], s0
+; ENABLED-NOT: s0
+; CHECK: bl      leaf
+; DISABLED: vmov.f32 s0, [[TEMP]]
+; ENABLED-NOT: s0
+; CHECK: ASM2: s0
+  %a = tail call float asm sideeffect "// ASM1: $0", "={s0},0"(float undef)
+  tail call void @leaf()
+  %b = tail call float asm sideeffect "// ASM2: $0", "={s0},0"(float %a)
+  ret void
+}
+
+define void @test_s0_clobbered() {
+; CHECK-LABEL: test_s0_clobbered:
+entry:
+; CHECK: ASM1: s0
+; CHECK: vmov.f32 [[TEMP:s[0-9]+]], s0
+; CHECK: bl      leaf_s0
+; CHECK: vmov.f32 s0, [[TEMP]]
+; CHECK: ASM2: s0
+  %a = tail call float asm sideeffect "// ASM1: $0", "={s0},0"(float undef)
+  tail call void @leaf_s0()
+  %b = tail call float asm sideeffect "// ASM2: $0", "={s0},0"(float %a)
+  ret void
+}
+
+define void @test_d0_preserved() {
+; CHECK-LABEL: test_d0_preserved:
+entry:
+; CHECK: ASM1: d0
+; DISABLED: vmov.f64 [[TEMP:d[0-9]+]], d0
+; ENABLED-NOT: d0
+; CHECK: bl      leaf
+; DISABLED: vmov.f64 d0, [[TEMP]]
+; ENABLED-NOT: d0
+; CHECK: ASM2: d0
+  %a = tail call double asm sideeffect "// ASM1: $0", "={d0},0"(double undef)
+  tail call void @leaf()
+  %b = tail call double asm sideeffect "// ASM2: $0", "={d0},0"(double %a)
+  ret void
+}
+
+define void @test_d0_clobbered() {
+; CHECK-LABEL: test_d0_clobbered:
+entry:
+; CHECK: ASM1: d0
+; CHECK: vmov.f64 [[TEMP:d[0-9]+]], d0
+; CHECK: bl      leaf_d0
+; CHECK: vmov.f64 d0, [[TEMP]]
+; CHECK: ASM2: d0
+  %a = tail call double asm sideeffect "// ASM1: $0", "={d0},0"(double undef)
+  tail call void @leaf_d0()
+  %b = tail call double asm sideeffect "// ASM2: $0", "={d0},0"(double %a)
+  ret void
+}
+
+; s0 and d0 overlap, so clobbering one in the callee prevents the other from
+; being kept live across the call.
+define void @test_s0_clobber_d0() {
+; CHECK-LABEL: test_s0_clobber_d0:
+entry:
+; CHECK: ASM1: s0
+; CHECK: vmov.f32 [[TEMP:s[0-9]+]], s0
+; CHECK: bl      leaf_d0
+; CHECK: vmov.f32 s0, [[TEMP]]
+; CHECK: ASM2: s0
+  %a = tail call float asm sideeffect "// ASM1: $0", "={s0},0"(float undef)
+  tail call void @leaf_d0()
+  %b = tail call float asm sideeffect "// ASM2: $0", "={s0},0"(float %a)
+  ret void
+}
+
+define void @test_d0_clobber_s0() {
+; CHECK-LABEL: test_d0_clobber_s0:
+entry:
+; CHECK: ASM1: d0
+; CHECK: vmov.f64 [[TEMP:d[0-9]+]], d0
+; CHECK: bl      leaf_s0
+; CHECK: vmov.f64 d0, [[TEMP]]
+; CHECK: ASM2: d0
+  %a = tail call double asm sideeffect "// ASM1: $0", "={d0},0"(double undef)
+  tail call void @leaf_s0()
+  %b = tail call double asm sideeffect "// ASM2: $0", "={d0},0"(double %a)
+  ret void
+}