summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.h8
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp18
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp4
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h4
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp8
9 files changed, 36 insertions, 31 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 33e8f23f6cd..465137f2750 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1878,8 +1878,8 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
return Offset1 + 1 == Offset2;
}
-bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
- MachineInstr &Second) const {
+bool AArch64InstrInfo::shouldScheduleAdjacent(
+ const MachineInstr &First, const MachineInstr &Second) const {
if (Subtarget.hasArithmeticBccFusion()) {
// Fuse CMN, CMP, TST followed by Bcc.
unsigned SecondOpcode = Second.getOpcode();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 149469e56d7..90b2c089687 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -133,15 +133,11 @@ public:
int64_t &Offset, unsigned &Width,
const TargetRegisterInfo *TRI) const;
- bool enableClusterLoads() const override { return true; }
-
- bool enableClusterStores() const override { return true; }
-
bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt,
unsigned NumLoads) const override;
- bool shouldScheduleAdjacent(MachineInstr &First,
- MachineInstr &Second) const override;
+ bool shouldScheduleAdjacent(const MachineInstr &First,
+ const MachineInstr &Second) const override;
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
uint64_t Offset, const MDNode *Var,
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index cdc469c7b86..b84abc795d9 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -299,6 +300,15 @@ public:
return getTM<AArch64TargetMachine>();
}
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(createMacroFusionDAGMutation(DAG->TII));
+ return DAG;
+ }
+
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index f88bb69c3a7..e4dc6599e15 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -32,14 +32,6 @@ void AMDGPUInstrInfo::anchor() {}
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
: AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
-bool AMDGPUInstrInfo::enableClusterLoads() const {
- return true;
-}
-
-bool AMDGPUInstrInfo::enableClusterStores() const {
- return true;
-}
-
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
// the first 16 loads will be interleaved with the stores, and the next 16 will
// be clustered as expected. It should really split into 2 16 store batches.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 46e985dc8fd..bd8e389639f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -39,9 +39,6 @@ private:
public:
explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
- bool enableClusterLoads() const override;
- bool enableClusterStores() const override;
-
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index baf4d192c57..7287b56aa6d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -102,14 +102,8 @@ static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
new ScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
-
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
- if (TII->enableClusterLoads())
- DAG->addMutation(createLoadClusterDAGMutation(TII, DAG->TRI));
-
- if (TII->enableClusterStores())
- DAG->addMutation(createStoreClusterDAGMutation(TII, DAG->TRI));
-
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
return DAG;
}
@@ -291,6 +285,14 @@ public:
return getTM<AMDGPUTargetMachine>();
}
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+ return DAG;
+ }
+
void addEarlyCSEOrGVNPass();
void addStraightLineScalarOptimizationPasses();
void addIRPasses() override;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 60df110483d..eb1cb0c409c 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -8023,8 +8023,8 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
return true;
}
-bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
- MachineInstr &Second) const {
+bool X86InstrInfo::shouldScheduleAdjacent(const MachineInstr &First,
+ const MachineInstr &Second) const {
// Check if this processor supports macro-fusion. Since this is a minor
// heuristic, we haven't specifically reserved a feature. hasAVX is a decent
// proxy for SandyBridge+.
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index a0292bbdaed..8d746172dcb 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -443,8 +443,8 @@ public:
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const override;
- bool shouldScheduleAdjacent(MachineInstr &First,
- MachineInstr &Second) const override;
+ bool shouldScheduleAdjacent(const MachineInstr &First,
+ const MachineInstr &Second) const override;
void getNoopForMachoTarget(MCInst &NopInst) const override;
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index b39870265d0..6eb96a5e184 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -18,6 +18,7 @@
#include "X86TargetTransformInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
@@ -284,6 +285,13 @@ public:
return getTM<X86TargetMachine>();
}
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+ DAG->addMutation(createMacroFusionDAGMutation(DAG->TII));
+ return DAG;
+ }
+
void addIRPasses() override;
bool addInstSelector() override;
#ifdef LLVM_BUILD_GLOBAL_ISEL
OpenPOWER on IntegriCloud