summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorMatthias Braun <matze@braunis.de>2016-10-04 19:28:21 +0000
committerMatthias Braun <matze@braunis.de>2016-10-04 19:28:21 +0000
commit46a5238682fc69457c70cca0e9bb950140062b7d (patch)
tree77a796443e9fe292be22195e951528f84ea5363b /llvm/lib/Target
parenta271d1a531ecfa5c1c71f644fc4aaebfd9ae4fbd (diff)
downloadbcm5719-llvm-46a5238682fc69457c70cca0e9bb950140062b7d.tar.gz
bcm5719-llvm-46a5238682fc69457c70cca0e9bb950140062b7d.zip
AArch64: Macrofusion: Split features, add missing combinations.
AArch64InstrInfo::shouldScheduleAdjacent() determines whether two instruction can benefit from macroop fusion on apple CPUs. The list turned out to be incomplete: - the "rr" variants of the instructions were missing - even the "rs" variants can have shift value == 0 and behave like the "rr" variants This also splits the MacropFusion target feature into ArithmeticBccFusion and ArithmeticCbzFusion. Differential Revision: https://reviews.llvm.org/D25142 llvm-svn: 283243
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td15
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp49
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h6
3 files changed, 59 insertions, 11 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 5c66748cee6..2ff3cf45a84 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -94,9 +94,13 @@ def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
"true", "Use alternative pattern for sextload convert to f32">;
-def FeatureMacroOpFusion : SubtargetFeature<
- "macroop-fusion", "HasMacroOpFusion", "true",
- "CPU supports macro op fusion">;
+def FeatureArithmeticBccFusion : SubtargetFeature<
+ "arith-bcc-fusion", "HasArithmeticBccFusion", "true",
+ "CPU fuses arithmetic+bcc operations">;
+
+def FeatureArithmeticCbzFusion : SubtargetFeature<
+ "arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
+ "CPU fuses arithmetic + cbz/cbnz operations">;
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
@@ -204,7 +208,8 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
FeatureCrypto,
FeatureDisableLatencySchedHeuristic,
FeatureFPARMv8,
- FeatureMacroOpFusion,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
FeatureNEON,
FeaturePerfMon,
FeatureSlowMisaligned128Store,
@@ -244,7 +249,7 @@ def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
FeatureCRC,
FeatureCrypto,
FeatureFPARMv8,
- FeatureMacroOpFusion,
+ FeatureArithmeticBccFusion,
FeatureNEON,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 859f7828901..b26dbce1875 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1876,39 +1876,80 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
MachineInstr &Second) const {
- if (Subtarget.hasMacroOpFusion()) {
+ if (Subtarget.hasArithmeticBccFusion()) {
// Fuse CMN, CMP, TST followed by Bcc.
unsigned SecondOpcode = Second.getOpcode();
if (SecondOpcode == AArch64::Bcc) {
switch (First.getOpcode()) {
default:
return false;
- case AArch64::SUBSWri:
case AArch64::ADDSWri:
- case AArch64::ANDSWri:
- case AArch64::SUBSXri:
+ case AArch64::ADDSWrr:
case AArch64::ADDSXri:
+ case AArch64::ADDSXrr:
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSXrr:
+ case AArch64::BICSWrr:
+ case AArch64::BICSXrr:
return true;
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !hasShiftedReg(Second);
}
}
+ }
+ if (Subtarget.hasArithmeticCbzFusion()) {
// Fuse ALU operations followed by CBZ/CBNZ.
+ unsigned SecondOpcode = Second.getOpcode();
if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
switch (First.getOpcode()) {
default:
return false;
case AArch64::ADDWri:
+ case AArch64::ADDWrr:
case AArch64::ADDXri:
+ case AArch64::ADDXrr:
case AArch64::ANDWri:
+ case AArch64::ANDWrr:
case AArch64::ANDXri:
+ case AArch64::ANDXrr:
case AArch64::EORWri:
+ case AArch64::EORWrr:
case AArch64::EORXri:
+ case AArch64::EORXrr:
case AArch64::ORRWri:
+ case AArch64::ORRWrr:
case AArch64::ORRXri:
+ case AArch64::ORRXrr:
case AArch64::SUBWri:
+ case AArch64::SUBWrr:
case AArch64::SUBXri:
+ case AArch64::SUBXrr:
return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !hasShiftedReg(Second);
}
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 9f51c6be635..a21dbd8322f 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -80,7 +80,8 @@ protected:
bool Misaligned128StoreIsSlow = false;
bool AvoidQuadLdStPairs = false;
bool UseAlternateSExtLoadCVTF32Pattern = false;
- bool HasMacroOpFusion = false;
+ bool HasArithmeticBccFusion = false;
+ bool HasArithmeticCbzFusion = false;
bool DisableLatencySchedHeuristic = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
@@ -188,7 +189,8 @@ public:
bool useAlternateSExtLoadCVTF32Pattern() const {
return UseAlternateSExtLoadCVTF32Pattern;
}
- bool hasMacroOpFusion() const { return HasMacroOpFusion; }
+ bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
+ bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
return VectorInsertExtractBaseCost;
OpenPOWER on IntegriCloud