summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorEvandro Menezes <e.menezes@samsung.com>2017-02-01 02:54:42 +0000
committerEvandro Menezes <e.menezes@samsung.com>2017-02-01 02:54:42 +0000
commit455382ea220b0d432aec8b7153b6d8384f032000 (patch)
tree05aeafdd69da2b70844b5abc3d571b0b517424ab /llvm/lib/Target
parentb21fb29c26f386be8dd44518d121219db97eda28 (diff)
downloadbcm5719-llvm-455382ea220b0d432aec8b7153b6d8384f032000.tar.gz
bcm5719-llvm-455382ea220b0d432aec8b7153b6d8384f032000.zip
[AArch64] Add new target feature to fuse literal generation
This feature enables the fusion of such operations on Cortex A57, as recommended in its Software Optimisation Guide, sections 4.14 and 4.15. Differential revision: https://reviews.llvm.org/D28698 llvm-svn: 293739
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td5
-rw-r--r--llvm/lib/Target/AArch64/AArch64MacroFusion.cpp25
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp14
4 files changed, 46 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 1f21da75681..deeb98b7f55 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -107,6 +107,10 @@ def FeatureFuseAES : SubtargetFeature<
"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
+def FeatureFuseLiterals : SubtargetFeature<
+ "fuse-literals", "HasFuseLiterals", "true",
+ "CPU fuses literal generation operations">;
+
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
@@ -189,6 +193,7 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
FeatureFuseAES,
+ FeatureFuseLiterals,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index f6d693262c3..7919b681dcb 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -129,6 +129,31 @@ static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII,
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
}
+ if (ST.hasFuseLiterals())
+ // Fuse literal generation operations.
+ switch (FirstOpcode) {
+ // PC relative address.
+ case AArch64::ADRP:
+ return SecondOpcode == AArch64::ADDXri ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // 32 bit immediate.
+ case AArch64::MOVZWi:
+ return (SecondOpcode == AArch64::MOVKWi &&
+ Second->getOperand(3).getImm() == 16) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // Lower half of 64 bit immediate.
+ case AArch64::MOVZXi:
+ return (SecondOpcode == AArch64::MOVKXi &&
+ Second->getOperand(3).getImm() == 16) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // Upper half of 64 bit immediate.
+ case AArch64::MOVKXi:
+ return First->getOperand(3).getImm() == 32 &&
+ ((SecondOpcode == AArch64::MOVKXi &&
+ Second->getOperand(3).getImm() == 48) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END);
+ }
+
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 26eac3c6349..a5b88ab2619 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -85,6 +85,7 @@ protected:
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
bool HasFuseAES = false;
+ bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
uint8_t MaxInterleaveFactor = 2;
@@ -199,6 +200,7 @@ public:
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
bool hasFuseAES() const { return HasFuseAES; }
+ bool hasFuseLiterals() const { return HasFuseLiterals; }
bool useRSqrt() const { return UseRSqrt; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 63a1acab4e4..3368c984187 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -330,6 +330,20 @@ public:
return DAG;
}
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
+ if (ST.hasFuseLiterals()) {
+ // Run the Macro Fusion after RA again since literals are expanded from
+ // pseudos then (v. addPreSched2()).
+ ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+ DAG->addMutation(createAArch64MacroFusionDAGMutation());
+ return DAG;
+ }
+
+ return nullptr;
+ }
+
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
OpenPOWER on IntegriCloud