summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJonas Paulsson <paulsson@linux.vnet.ibm.com>2018-05-25 08:42:02 +0000
committerJonas Paulsson <paulsson@linux.vnet.ibm.com>2018-05-25 08:42:02 +0000
commit7d484fae2be8cbaafb1c3b42b659089df5a77d9c (patch)
tree1accc905e7405b50d84c4df084e530ae088a58fe
parentad8b7c1190b963e9df1757fbb28f14f2f34ec9a2 (diff)
downloadbcm5719-llvm-7d484fae2be8cbaafb1c3b42b659089df5a77d9c.tar.gz
bcm5719-llvm-7d484fae2be8cbaafb1c3b42b659089df5a77d9c.zip
[RegUsageInfoCollector] Bugfix for callee saved registers.
Previously, this pass would look at the (static) set returned by getCallPreservedMask() and add those back as preserved in the case when isSafeForNoCSROpt() returns false. A problem is that a target may have to save some registers even when NoCSROpt takes place. For instance, on SystemZ, the return register is needed upon return from a function. Furthermore, getCallPreservedMask() only includes the registers that the target actually wishes to emit save/restore instructions for. This means that subregs and (fully saved) superregs are missing. This patch instead takes the (dynamic) set returned by target for the function from determineCalleeSaves() and then adds sub/super regs to build the set to be used when building the RegMask for the function. Review: Quentin Colombet, Ulrich Weigand https://reviews.llvm.org/D46315 llvm-svn: 333261
-rw-r--r--llvm/lib/CodeGen/RegUsageInfoCollector.cpp70
-rw-r--r--llvm/test/CodeGen/SystemZ/ipra-04.ll34
2 files changed, 93 insertions, 11 deletions
diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 6a0f26b346c..1bf205d5e60 100644
--- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -56,6 +56,10 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
+ // Call determineCalleeSaves and then also set the bits for subregs and
+ // fully saved superregs.
+ static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF);
+
static char ID;
};
} // end of anonymous namespace
@@ -103,6 +107,9 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Clobbered Registers: ");
+ BitVector SavedRegs;
+ computeCalleeSavedRegs(SavedRegs, MF);
+
const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask();
auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
RegMask[Reg / 32] &= ~(1u << Reg % 32);
@@ -110,11 +117,15 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
// Scan all the physical registers. When a register is defined in the current
// function set it and all the aliasing registers as defined in the regmask.
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ // Don't count registers that are saved and restored.
+ if (SavedRegs.test(PReg))
+ continue;
// If a register is defined by an instruction mark it as defined together
- // with all it's aliases.
+ // with all it's unsaved aliases.
if (!MRI->def_empty(PReg)) {
for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
- SetRegAsDefined(*AI);
+ if (!SavedRegs.test(*AI))
+ SetRegAsDefined(*AI);
continue;
}
// If a register is in the UsedPhysRegsMask set then mark it as defined.
@@ -124,15 +135,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
SetRegAsDefined(PReg);
}
- if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
- const uint32_t *CallPreservedMask =
- TRI->getCallPreservedMask(MF, F.getCallingConv());
- if (CallPreservedMask) {
- // Set callee saved register as preserved.
- for (unsigned i = 0; i < RegMaskSize; ++i)
- RegMask[i] = RegMask[i] | CallPreservedMask[i];
- }
- } else {
+ if (TargetFrameLowering::isSafeForNoCSROpt(F)) {
++NumCSROpt;
LLVM_DEBUG(dbgs() << MF.getName()
<< " function optimized for not having CSR.\n");
@@ -148,3 +151,48 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
return false;
}
+
+void RegUsageInfoCollector::
+computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ // Target will return the set of registers that it saves/restores as needed.
+ SavedRegs.clear();
+ TFI->determineCalleeSaves(MF, SavedRegs);
+
+ // Insert subregs.
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (SavedRegs.test(Reg))
+ for (MCSubRegIterator SR(Reg, TRI, false); SR.isValid(); ++SR)
+ SavedRegs.set(*SR);
+ }
+
+ // Insert any register fully saved via subregisters.
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ if (SavedRegs.test(PReg))
+ continue;
+
+ // Check if PReg is fully covered by its subregs.
+ bool CoveredBySubRegs = false;
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ if (RC->CoveredBySubRegs && RC->contains(PReg)) {
+ CoveredBySubRegs = true;
+ break;
+ }
+ if (!CoveredBySubRegs)
+ continue;
+
+ // Add PReg to SavedRegs if all subregs are saved.
+ bool AllSubRegsSaved = true;
+ for (MCSubRegIterator SR(PReg, TRI, false); SR.isValid(); ++SR)
+ if (!SavedRegs.test(*SR)) {
+ AllSubRegsSaved = false;
+ break;
+ }
+ if (AllSubRegsSaved)
+ SavedRegs.set(PReg);
+ }
+}
diff --git a/llvm/test/CodeGen/SystemZ/ipra-04.ll b/llvm/test/CodeGen/SystemZ/ipra-04.ll
new file mode 100644
index 00000000000..516d8595278
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/ipra-04.ll
@@ -0,0 +1,34 @@
+; Test that the updated regmask on the call to @fun1 preserves %r14 and
+; %15. @fun1 will save and restore these registers since it contains a call.
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -enable-ipra -print-regmask-num-regs=-1 \
+; RUN: -debug-only=ip-regalloc 2>&1 < %s | FileCheck --check-prefix=DBG %s
+; REQUIRES: asserts
+;
+; DBG: fun1 function optimized for not having CSR
+; DBG: Call Instruction After Register Usage Info Propagation : CallBRASL @fun1{{.*}} $r14d $r15d
+
+declare dso_local fastcc signext i32 @foo(i16*, i32 signext) unnamed_addr
+
+define internal fastcc void @fun1(i16* %arg, i16* nocapture %arg1) unnamed_addr #0 {
+bb:
+ %tmp = load i16, i16* undef, align 2
+ %tmp2 = shl i16 %tmp, 4
+ %tmp3 = tail call fastcc signext i32 @foo(i16* nonnull %arg, i32 signext 5)
+ %tmp4 = or i16 0, %tmp2
+ %tmp5 = or i16 %tmp4, 0
+ store i16 %tmp5, i16* undef, align 2
+ %tmp6 = getelementptr inbounds i16, i16* %arg, i64 5
+ %tmp7 = load i16, i16* %tmp6, align 2
+ store i16 %tmp7, i16* %arg1, align 2
+ ret void
+}
+
+define fastcc void @fun0(i8* nocapture readonly %arg, i16* nocapture %arg1, i32 signext %arg2) unnamed_addr {
+bb:
+ %a = alloca i8, i64 undef
+ call fastcc void @fun1(i16* nonnull undef, i16* %arg1)
+ ret void
+}
+
+attributes #0 = { norecurse nounwind "no-frame-pointer-elim"="false" }
OpenPOWER on IntegriCloud