diff options
| author | Matthias Braun <matze@braunis.de> | 2016-10-04 19:28:21 +0000 |
|---|---|---|
| committer | Matthias Braun <matze@braunis.de> | 2016-10-04 19:28:21 +0000 |
| commit | 46a5238682fc69457c70cca0e9bb950140062b7d (patch) | |
| tree | 77a796443e9fe292be22195e951528f84ea5363b /llvm/lib/Target | |
| parent | a271d1a531ecfa5c1c71f644fc4aaebfd9ae4fbd (diff) | |
| download | bcm5719-llvm-46a5238682fc69457c70cca0e9bb950140062b7d.tar.gz bcm5719-llvm-46a5238682fc69457c70cca0e9bb950140062b7d.zip | |
AArch64: Macrofusion: Split features, add missing combinations.
AArch64InstrInfo::shouldScheduleAdjacent() determines whether two
instruction can benefit from macroop fusion on apple CPUs. The list
turned out to be incomplete:
- the "rr" variants of the instructions were missing
- even the "rs" variants can have shift value == 0 and behave like the
"rr" variants
This also splits the MacropFusion target feature into
ArithmeticBccFusion and ArithmeticCbzFusion.
Differential Revision: https://reviews.llvm.org/D25142
llvm-svn: 283243
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64.td | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 49 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64Subtarget.h | 6 |
3 files changed, 59 insertions, 11 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 5c66748cee6..2ff3cf45a84 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -94,9 +94,13 @@ def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", "true", "Use alternative pattern for sextload convert to f32">; -def FeatureMacroOpFusion : SubtargetFeature< - "macroop-fusion", "HasMacroOpFusion", "true", - "CPU supports macro op fusion">; +def FeatureArithmeticBccFusion : SubtargetFeature< + "arith-bcc-fusion", "HasArithmeticBccFusion", "true", + "CPU fuses arithmetic+bcc operations">; + +def FeatureArithmeticCbzFusion : SubtargetFeature< + "arith-cbz-fusion", "HasArithmeticCbzFusion", "true", + "CPU fuses arithmetic + cbz/cbnz operations">; def FeatureDisableLatencySchedHeuristic : SubtargetFeature< "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", @@ -204,7 +208,8 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", FeatureCrypto, FeatureDisableLatencySchedHeuristic, FeatureFPARMv8, - FeatureMacroOpFusion, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, FeatureNEON, FeaturePerfMon, FeatureSlowMisaligned128Store, @@ -244,7 +249,7 @@ def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan", FeatureCRC, FeatureCrypto, FeatureFPARMv8, - FeatureMacroOpFusion, + FeatureArithmeticBccFusion, FeatureNEON, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 859f7828901..b26dbce1875 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1876,39 +1876,80 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First, MachineInstr &Second) const { - if (Subtarget.hasMacroOpFusion()) { + if (Subtarget.hasArithmeticBccFusion()) { // Fuse CMN, CMP, TST followed by Bcc. unsigned SecondOpcode = Second.getOpcode(); if (SecondOpcode == AArch64::Bcc) { switch (First.getOpcode()) { default: return false; - case AArch64::SUBSWri: case AArch64::ADDSWri: - case AArch64::ANDSWri: - case AArch64::SUBSXri: + case AArch64::ADDSWrr: case AArch64::ADDSXri: + case AArch64::ADDSXrr: + case AArch64::ANDSWri: + case AArch64::ANDSWrr: case AArch64::ANDSXri: + case AArch64::ANDSXrr: + case AArch64::SUBSWri: + case AArch64::SUBSWrr: + case AArch64::SUBSXri: + case AArch64::SUBSXrr: + case AArch64::BICSWrr: + case AArch64::BICSXrr: return true; + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: + case AArch64::ANDSWrs: + case AArch64::ANDSXrs: + case AArch64::SUBSWrs: + case AArch64::SUBSXrs: + case AArch64::BICSWrs: + case AArch64::BICSXrs: + // Shift value can be 0 making these behave like the "rr" variant... + return !hasShiftedReg(Second); } } + } + if (Subtarget.hasArithmeticCbzFusion()) { // Fuse ALU operations followed by CBZ/CBNZ. + unsigned SecondOpcode = Second.getOpcode(); if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX || SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) { switch (First.getOpcode()) { default: return false; case AArch64::ADDWri: + case AArch64::ADDWrr: case AArch64::ADDXri: + case AArch64::ADDXrr: case AArch64::ANDWri: + case AArch64::ANDWrr: case AArch64::ANDXri: + case AArch64::ANDXrr: case AArch64::EORWri: + case AArch64::EORWrr: case AArch64::EORXri: + case AArch64::EORXrr: case AArch64::ORRWri: + case AArch64::ORRWrr: case AArch64::ORRXri: + case AArch64::ORRXrr: case AArch64::SUBWri: + case AArch64::SUBWrr: case AArch64::SUBXri: + case AArch64::SUBXrr: return true; + case AArch64::ADDWrs: + case AArch64::ADDXrs: + case AArch64::ANDWrs: + case AArch64::ANDXrs: + case AArch64::SUBWrs: + case AArch64::SUBXrs: + case AArch64::BICWrs: + case AArch64::BICXrs: + // Shift value can be 0 making these behave like the "rr" variant... + return !hasShiftedReg(Second); } } } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 9f51c6be635..a21dbd8322f 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -80,7 +80,8 @@ protected: bool Misaligned128StoreIsSlow = false; bool AvoidQuadLdStPairs = false; bool UseAlternateSExtLoadCVTF32Pattern = false; - bool HasMacroOpFusion = false; + bool HasArithmeticBccFusion = false; + bool HasArithmeticCbzFusion = false; bool DisableLatencySchedHeuristic = false; uint8_t MaxInterleaveFactor = 2; uint8_t VectorInsertExtractBaseCost = 3; @@ -188,7 +189,8 @@ public: bool useAlternateSExtLoadCVTF32Pattern() const { return UseAlternateSExtLoadCVTF32Pattern; } - bool hasMacroOpFusion() const { return HasMacroOpFusion; } + bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } + bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } unsigned getVectorInsertExtractBaseCost() const { return VectorInsertExtractBaseCost; |

