[AArch64] Optimize floating point materialization

This patch follows some ideas from r352866 to optimize the floating point materialization even further. It changes isFPImmLegal to considere up to 2 mov instruction or up to 5 in case subtarget has fused literals. The rationale is the cost is the same for mov+fmov vs. adrp+ldr; but the mov+fmov sequence is always better because of the reduced d-cache pressure. The timings are still the same if you consider movw+movk+fmov vs. adrp+ldr will be fused (although one instruction longer). Reviewers: efriedma Differential Revision: https://reviews.llvm.org/D58460 llvm-svn: 356390
author: Adhemerval Zanella <adhemerval.zanella@linaro.org> 2019-03-18 18:45:57 +0000
committer: Adhemerval Zanella <adhemerval.zanella@linaro.org> 2019-03-18 18:45:57 +0000
commit: a3cefa5d6492a4ba593b74b1aa615f00a5b6166d (patch)
tree: d8376cb3cd79b6fb89b4421c0936a5d3337a29ac /llvm/lib
parent: 664c1ef52849623ef509e34968e0807a21b7bf15 (diff)
download: bcm5719-llvm-a3cefa5d6492a4ba593b74b1aa615f00a5b6166d.tar.gz
bcm5719-llvm-a3cefa5d6492a4ba593b74b1aa615f00a5b6166d.zip
1 files changed, 13 insertions, 3 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 108fd8c8a49..3d72949f1c6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AArch64ExpandImm.h"
 #include "AArch64ISelLowering.h"
 #include "AArch64CallingConvention.h"
 #include "AArch64MachineFunctionInfo.h"
@@ -5424,9 +5425,18 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
   // If we can not materialize in immediate field for fmov, check if the
   // value can be encoded as the immediate operand of a logical instruction.
   // The immediate value will be created with either MOVZ, MOVN, or ORR.
-  if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32))
-    IsLegal = AArch64_AM::isAnyMOVWMovAlias(ImmInt.getZExtValue(),
-                                            VT.getSizeInBits());
+  if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
+    // The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
+    // however the mov+fmov sequence is always better because of the reduced
+    // cache pressure. The timings are still the same if you consider
+    // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
+    // movw+movk is fused). So we limit up to 2 instrdduction at most.
+    SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+    AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
+			      Insn);
+    unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
+    IsLegal = Insn.size() <= Limit;
+  }
 
   LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
                     << " imm value: "; Imm.dump(););
author	Adhemerval Zanella <adhemerval.zanella@linaro.org>	2019-03-18 18:45:57 +0000
committer	Adhemerval Zanella <adhemerval.zanella@linaro.org>	2019-03-18 18:45:57 +0000
commit	a3cefa5d6492a4ba593b74b1aa615f00a5b6166d (patch)
tree	d8376cb3cd79b6fb89b4421c0936a5d3337a29ac /llvm/lib
parent	664c1ef52849623ef509e34968e0807a21b7bf15 (diff)
download	bcm5719-llvm-a3cefa5d6492a4ba593b74b1aa615f00a5b6166d.tar.gz bcm5719-llvm-a3cefa5d6492a4ba593b74b1aa615f00a5b6166d.zip