From c15d47bb013e975da582c8fd786ba8234d70d75d Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 13 Sep 2018 10:28:05 +0000 Subject: ARM: align loops to 4 bytes on Cortex-M3 and Cortex-M4. The Technical Reference Manuals for these two CPUs state that branching to an unaligned 32-bit instruction incurs an extra pipeline reload penalty. That's bad. This also enables the optimization at -Os since it costs on average one byte per loop in return for 1 cycle per iteration, which is pretty good going. llvm-svn: 342127 --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'llvm/lib/CodeGen') diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 21350df624e..624d3365b4f 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -2497,7 +2497,8 @@ void MachineBlockPlacement::alignBlocks() { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F->getFunction().optForSize()) + if (F->getFunction().optForMinSize() || + (F->getFunction().optForSize() && !TLI->alignLoopsWithOptSize())) return; BlockChain &FunctionChain = *BlockToChain[&F->front()]; if (FunctionChain.begin() == FunctionChain.end()) -- cgit v1.2.3