Use literal pool loads instead of MOVW/MOVT for materializing global addresses when optimizing for size.

On spec/gcc, this caused a codesize improvement of ~1.9% for ARM mode and ~4.9% for Thumb(2) mode. This is codesize including literal pools. The pools themselves doubled in size for ARM mode and quintupled for Thumb mode, leaving suggestion that there is still perhaps redundancy in LLVM's use of constant pools that could be decreased by sharing entries. Fixes PR11087. llvm-svn: 142530
author: James Molloy <james.molloy@arm.com> 2011-10-19 14:11:07 +0000
committer: James Molloy <james.molloy@arm.com> 2011-10-19 14:11:07 +0000
commit: 2d768fd37999cf57a162c33a54484614ac421a52 (patch)
tree: 21110c3b7934f25e484f83d51d8b5fc6bae2a314
parent: fba251f267838e4a725524559f1dfe672c2044bf (diff)
download: bcm5719-llvm-2d768fd37999cf57a162c33a54484614ac421a52.tar.gz
bcm5719-llvm-2d768fd37999cf57a162c33a54484614ac421a52.zip
2 files changed, 33 insertions, 3 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index beae30a2a7b..34023af084a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2103,8 +2103,10 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
   }
 
   // If we have T2 ops, we can materialize the address directly via movt/movw
-  // pair. This is always cheaper.
-  if (Subtarget->useMovt()) {
+  // pair. This is always cheaper in terms of performance, but uses at least 2
+  // extra bytes.
+  if (Subtarget->useMovt() &&
+      !DAG.getMachineFunction().getFunction()->hasFnAttr(Attribute::OptimizeForSize)) {
     ++NumMovwMovt;
     // FIXME: Once remat is capable of dealing with instructions with register
     // operands, expand this into two nodes.
@@ -2129,7 +2131,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
   // FIXME: Enable this for static codegen when tool issues are fixed.
-  if (Subtarget->useMovt() && RelocM != Reloc::Static) {
+  if (Subtarget->useMovt() && RelocM != Reloc::Static &&
+      !DAG.getMachineFunction().getFunction()->hasFnAttr(Attribute::OptimizeForSize)) {
     ++NumMovwMovt;
     // FIXME: Once remat is capable of dealing with instructions with register
     // operands, expand this into two nodes.
diff --git a/llvm/test/CodeGen/ARM/2011-10-18-DisableMovtSize.ll b/llvm/test/CodeGen/ARM/2011-10-18-DisableMovtSize.ll
new file mode 100644
index 00000000000..6dae75be91c
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/2011-10-18-DisableMovtSize.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin  | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-unknown-linux-eabi | FileCheck %s
+
+; Check that when optimizing for size, a literal pool load is used
+; instead of the (potentially faster) movw/movt pair when loading
+; a large constant.
+
+@x = global i32* inttoptr (i32 305419888 to i32*), align 4
+
+define i32 @f() optsize {
+  ; CHECK: f:
+  ; CHECK: ldr  r{{.}}, {{.?}}LCPI{{.}}_{{.}}
+  ; CHECK: ldr  r{{.}}, [{{(pc, )?}}r{{.}}]
+  ; CHECK: ldr  r{{.}}, [r{{.}}]
+  %1 = load i32** @x, align 4
+  %2 = load i32* %1
+  ret i32 %2
+}
+
+define i32 @g() {
+  ; CHECK: g:
+  ; CHECK: movw
+  ; CHECK: movt
+  %1 = load i32** @x, align 4
+  %2 = load i32* %1
+  ret i32 %2
+}
author	James Molloy <james.molloy@arm.com>	2011-10-19 14:11:07 +0000
committer	James Molloy <james.molloy@arm.com>	2011-10-19 14:11:07 +0000
commit	2d768fd37999cf57a162c33a54484614ac421a52 (patch)
tree	21110c3b7934f25e484f83d51d8b5fc6bae2a314
parent	fba251f267838e4a725524559f1dfe672c2044bf (diff)
download	bcm5719-llvm-2d768fd37999cf57a162c33a54484614ac421a52.tar.gz bcm5719-llvm-2d768fd37999cf57a162c33a54484614ac421a52.zip