diff options
author | Balaram Makam <bmakam@codeaurora.org> | 2017-03-31 18:16:53 +0000 |
---|---|---|
committer | Balaram Makam <bmakam@codeaurora.org> | 2017-03-31 18:16:53 +0000 |
commit | 2aba753e843271a0cae9702a70793c05f64e04de (patch) | |
tree | de4e5d969df65f933ef21a66f4e286cd6f4be041 /llvm/lib/Target | |
parent | 48d1427c30b73b7ceef154100c774f753c600e31 (diff) | |
download | bcm5719-llvm-2aba753e843271a0cae9702a70793c05f64e04de.tar.gz bcm5719-llvm-2aba753e843271a0cae9702a70793c05f64e04de.zip |
[AArch64] Add new subtarget feature to fold LSL into address mode.
Summary:
This feature enables folding of logical shift operations of up to 3 places into addressing mode on Kryo and Falkor that have a fastpath LSL.
Reviewers: mcrosier, rengolin, t.p.northover
Subscribers: junbuml, gberry, llvm-commits, aemerson
Differential Revision: https://reviews.llvm.org/D31113
llvm-svn: 299240
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 47 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64Subtarget.h | 2 |
3 files changed, 53 insertions, 5 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 09897104f32..84473fd72ad 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -126,6 +126,9 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", "equivalent when the immediate does " "not fit in the encoding.">; +def FeatureLSLFast : SubtargetFeature< + "lsl-fast", "HasLSLFast", "true", + "CPU has a fastpath logical shift of up to 3 places">; //===----------------------------------------------------------------------===// // Architectures. // @@ -279,7 +282,8 @@ def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", FeaturePerfMon, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, - FeatureZCZeroing + FeatureZCZeroing, + FeatureLSLFast ]>; def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", @@ -293,7 +297,8 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureRDM, - FeatureZCZeroing + FeatureZCZeroing, + FeatureLSLFast ]>; def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 3099383e5b3..ae01ea477bb 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -328,11 +328,52 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { } } +/// \brief Determine whether it is worth it to fold SHL into the addressing +/// mode. +static bool isWorthFoldingSHL(SDValue V) { + assert(V.getOpcode() == ISD::SHL && "invalid opcode"); + // It is worth folding logical shift of up to three places. + auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); + if (!CSD) + return false; + unsigned ShiftVal = CSD->getZExtValue(); + if (ShiftVal > 3) + return false; + + // Check if this particular node is reused in any non-memory related + // operation. If yes, do not try to fold this node into the address + // computation, since the computation will be kept. + const SDNode *Node = V.getNode(); + for (SDNode *UI : Node->uses()) + if (!isa<MemSDNode>(*UI)) + for (SDNode *UII : UI->uses()) + if (!isa<MemSDNode>(*UII)) + return false; + return true; +} + /// \brief Determine whether it is worth to fold V into an extended register. bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { - // it hurts if the value is used at least twice, unless we are optimizing - // for code size. - return ForCodeSize || V.hasOneUse(); + // Trivial if we are optimizing for code size or if there is only + // one use of the value. + if (ForCodeSize || V.hasOneUse()) + return true; + // If a subtarget has a fastpath LSL we can fold a logical shift into + // the addressing mode and save a cycle. + if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && + isWorthFoldingSHL(V)) + return true; + if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { + const SDValue LHS = V.getOperand(0); + const SDValue RHS = V.getOperand(1); + if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) + return true; + if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) + return true; + } + + // It hurts otherwise, since the value will be reused. + return false; } /// SelectShiftedRegister - Select a "shifted register" operand. If the value diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 10377cbbb16..0ef8949b762 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -69,6 +69,7 @@ protected: bool HasPerfMon = false; bool HasFullFP16 = false; bool HasSPE = false; + bool HasLSLFast = false; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove = false; @@ -232,6 +233,7 @@ public: bool hasPerfMon() const { return HasPerfMon; } bool hasFullFP16() const { return HasFullFP16; } bool hasSPE() const { return HasSPE; } + bool hasLSLFast() const { return HasLSLFast; } bool isLittleEndian() const { return IsLittle; } |