summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2017-02-23 20:19:44 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2017-02-23 20:19:44 +0000
commitce3ddd2de4c5dbd5a7a68b51ea38f96cf7fbf3aa (patch)
tree05d2e6754a3580a96a723b01dcd89dcf58eec2bb
parent851125dca903fdaa458d87fb0a35349286a2395f (diff)
downloadbcm5719-llvm-ce3ddd2de4c5dbd5a7a68b51ea38f96cf7fbf3aa.tar.gz
bcm5719-llvm-ce3ddd2de4c5dbd5a7a68b51ea38f96cf7fbf3aa.zip
Correct register pressure calculation in presence of subregs
If a subreg is used in an instruction it counts as a whole superreg for the purpose of register pressure calculation. This patch corrects improper register pressure calculation by examining operand's lane mask. Differential Revision: https://reviews.llvm.org/D29835 llvm-svn: 296009
-rw-r--r--llvm/include/llvm/CodeGen/RegisterPressure.h2
-rw-r--r--llvm/include/llvm/Target/TargetRegisterInfo.h7
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp4
-rw-r--r--llvm/lib/CodeGen/RegisterPressure.cpp42
-rw-r--r--llvm/lib/CodeGen/TargetRegisterInfo.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h4
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-global-i32.ll32
-rw-r--r--llvm/test/CodeGen/AMDGPU/schedule-regpressure-subregs.mir67
9 files changed, 153 insertions, 30 deletions
diff --git a/llvm/include/llvm/CodeGen/RegisterPressure.h b/llvm/include/llvm/CodeGen/RegisterPressure.h
index a3ea41d5236..23fab7b0684 100644
--- a/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -156,7 +156,7 @@ public:
const_iterator begin() const { return &PressureChanges[0]; }
const_iterator end() const { return &PressureChanges[MaxPSets]; }
- void addPressureChange(unsigned RegUnit, bool IsDec,
+ void addPressureChange(RegisterMaskPair P, bool IsDec,
const MachineRegisterInfo *MRI);
void dump(const TargetRegisterInfo &TRI) const;
diff --git a/llvm/include/llvm/Target/TargetRegisterInfo.h b/llvm/include/llvm/Target/TargetRegisterInfo.h
index 673cca7f44a..3ee5685123c 100644
--- a/llvm/include/llvm/Target/TargetRegisterInfo.h
+++ b/llvm/include/llvm/Target/TargetRegisterInfo.h
@@ -30,6 +30,7 @@ namespace llvm {
class BitVector;
class MachineFunction;
+class MachineRegisterInfo;
class RegScavenger;
template<class T> class SmallVectorImpl;
class VirtRegMap;
@@ -719,6 +720,12 @@ public:
/// Get the weight in units of pressure for this register unit.
virtual unsigned getRegUnitWeight(unsigned RegUnit) const = 0;
+ /// Get the weight in units of pressure for a sub register of this register
+ /// unit given a lane mask.
+ virtual unsigned getRegUnitWeight(const MachineRegisterInfo &MRI,
+ unsigned RegUnit,
+ LaneBitmask LaneMask) const;
+
/// Get the number of dimensions of register pressure.
virtual unsigned getNumRegPressureSets() const = 0;
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 79b02c7de8c..066398358e8 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1085,7 +1085,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
continue;
PressureDiff &PDiff = getPressureDiff(&SU);
- PDiff.addPressureChange(Reg, Decrement, &MRI);
+ PDiff.addPressureChange(P, Decrement, &MRI);
DEBUG(
dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
<< PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
@@ -1123,7 +1123,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
if (LRQ.valueIn() == VNI) {
PressureDiff &PDiff = getPressureDiff(SU);
- PDiff.addPressureChange(Reg, true, &MRI);
+ PDiff.addPressureChange(P, true, &MRI);
DEBUG(
dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
<< *SU->getInstr();
diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index c726edc88b4..b22b36a3607 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -46,16 +46,29 @@
using namespace llvm;
+/// Clamp lane masks to maximum posible value.
+static void clampMasks(const MachineRegisterInfo &MRI, unsigned Reg,
+ LaneBitmask& LaneMask1, LaneBitmask& LaneMask2) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ LaneBitmask Max = MRI.getMaxLaneMaskForVReg(Reg);
+ LaneMask1 &= Max;
+ LaneMask2 &= Max;
+ }
+}
+
/// Increase pressure for each pressure set provided by TargetRegisterInfo.
static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
const MachineRegisterInfo &MRI, unsigned Reg,
LaneBitmask PrevMask, LaneBitmask NewMask) {
assert((PrevMask & ~NewMask).none() && "Must not remove bits");
- if (PrevMask.any() || NewMask.none())
+
+ clampMasks(MRI, Reg, PrevMask, NewMask);
+ if ((NewMask & ~PrevMask).none())
return;
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ unsigned Weight = TRI->getRegUnitWeight(MRI, Reg, NewMask & ~PrevMask);
PSetIterator PSetI = MRI.getPressureSets(Reg);
- unsigned Weight = PSetI.getWeight();
for (; PSetI.isValid(); ++PSetI)
CurrSetPressure[*PSetI] += Weight;
}
@@ -65,11 +78,13 @@ static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
const MachineRegisterInfo &MRI, unsigned Reg,
LaneBitmask PrevMask, LaneBitmask NewMask) {
//assert((NewMask & !PrevMask) == 0 && "Must not add bits");
- if (NewMask.any() || PrevMask.none())
+ clampMasks(MRI, Reg, PrevMask, NewMask);
+ if ((~NewMask & PrevMask).none())
return;
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ unsigned Weight = TRI->getRegUnitWeight(MRI, Reg, ~NewMask & PrevMask);
PSetIterator PSetI = MRI.getPressureSets(Reg);
- unsigned Weight = PSetI.getWeight();
for (; PSetI.isValid(); ++PSetI) {
assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow");
CurrSetPressure[*PSetI] -= Weight;
@@ -139,11 +154,14 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
LaneBitmask PreviousMask,
LaneBitmask NewMask) {
- if (PreviousMask.any() || NewMask.none())
+ clampMasks(*MRI, RegUnit, PreviousMask, NewMask);
+ if ((NewMask & ~PreviousMask).none())
return;
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ unsigned Weight = TRI->getRegUnitWeight(*MRI, RegUnit,
+ NewMask & ~PreviousMask);
PSetIterator PSetI = MRI->getPressureSets(RegUnit);
- unsigned Weight = PSetI.getWeight();
for (; PSetI.isValid(); ++PSetI) {
CurrSetPressure[*PSetI] += Weight;
P.MaxSetPressure[*PSetI] =
@@ -644,17 +662,19 @@ void PressureDiffs::addInstruction(unsigned Idx,
PressureDiff &PDiff = (*this)[Idx];
assert(!PDiff.begin()->isValid() && "stale PDiff");
for (const RegisterMaskPair &P : RegOpers.Defs)
- PDiff.addPressureChange(P.RegUnit, true, &MRI);
+ PDiff.addPressureChange(P, true, &MRI);
for (const RegisterMaskPair &P : RegOpers.Uses)
- PDiff.addPressureChange(P.RegUnit, false, &MRI);
+ PDiff.addPressureChange(P, false, &MRI);
}
/// Add a change in pressure to the pressure diff of a given instruction.
-void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
+void PressureDiff::addPressureChange(RegisterMaskPair P, bool IsDec,
const MachineRegisterInfo *MRI) {
- PSetIterator PSetI = MRI->getPressureSets(RegUnit);
- int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight();
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ int Weight = (int)TRI->getRegUnitWeight(*MRI, P.RegUnit, P.LaneMask);
+ PSetIterator PSetI = MRI->getPressureSets(P.RegUnit);
+ if (IsDec) Weight = -Weight;
for (; PSetI.isValid(); ++PSetI) {
// Find an existing entry in the pressure diff for this PSet.
PressureDiff::iterator I = nonconst_begin(), E = nonconst_end();
diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index 66cdad278e8..0d3f8eb6fb5 100644
--- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -412,6 +412,15 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
return true;
}
+/// Get the weight in units of pressure for a sub register of this register
+/// unit given a lane mask.
+unsigned TargetRegisterInfo::getRegUnitWeight(const MachineRegisterInfo &MRI,
+ unsigned RegUnit,
+ LaneBitmask LaneMask) const {
+ PSetIterator PSetI = MRI.getPressureSets(RegUnit);
+ return PSetI.getWeight();
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 39324cbbcc0..f1d037d1159 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -1408,3 +1409,18 @@ const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
return Empty;
return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
}
+
+unsigned SIRegisterInfo::getRegUnitWeight(const MachineRegisterInfo &MRI,
+ unsigned RegUnit,
+ LaneBitmask LaneMask) const {
+ unsigned Weight = TargetRegisterInfo::getRegUnitWeight(MRI, RegUnit,
+ LaneMask);
+ if (Weight > 1 && LaneMask.any() && !LaneMask.all() &&
+ isVirtualRegister(RegUnit)) {
+ LaneBitmask Max = MRI.getMaxLaneMaskForVReg(RegUnit);
+ if (Max != LaneMask && !Max.all() && !Max.none())
+ Weight = (Weight * countPopulation(LaneMask.getAsInteger())) /
+ countPopulation(Max.getAsInteger());
+ }
+ return Weight;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 679ed229758..844f0317a1e 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -228,6 +228,10 @@ public:
const int *getRegUnitPressureSets(unsigned RegUnit) const override;
+ unsigned getRegUnitWeight(const MachineRegisterInfo &MRI,
+ unsigned RegUnit,
+ LaneBitmask LaneMask) const override;
+
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
index e3335347a63..b606b2555ca 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
@@ -424,25 +424,25 @@ define void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; GCN-NOHSA: buffer_store_dwordx4
; GCN-NOHSA: buffer_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
-; GCN-HSA: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
define void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
%ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-subregs.mir b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-subregs.mir
new file mode 100644
index 00000000000..0beb7b76485
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-subregs.mir
@@ -0,0 +1,67 @@
+# RUN: llc -march=amdgcn -misched=converge -run-pass machine-scheduler -verify-misched %s -o - -debug-only=misched 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# CHECK-LABEL: ScheduleDAGMILive::schedule starting
+
+# Check that def and use subregs count with the same weight
+# CHECK: %vreg9:sub1<def> = V_MUL_LO_I32 %vreg6:sub1, 3
+# CHECK: Pressure Diff : {{$}}
+
+# Check that a subreg does not count as a whole superreg
+# CHECK: %vreg9:sub0<def> = V_MUL_LO_I32 %vreg6:sub0, %vreg9:sub1
+# CHECK: Pressure Diff : VGPR_32 1{{$}}
+
+# Check that two subregs of the same register count as a whole register
+# CHECK: DS_WRITE2_B32 %vreg7, %vreg9:sub0, %vreg9:sub1
+# CHECK: Pressure Diff : VGPR_32 3{{$}}
+
+---
+name: mo_pset
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_128 }
+ - { id: 1, class: sgpr_64 }
+ - { id: 2, class: sreg_32_xm0 }
+ - { id: 3, class: sgpr_32 }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: sreg_32_xm0_xexec }
+ - { id: 6, class: vreg_64 }
+ - { id: 7, class: vgpr_32 }
+ - { id: 8, class: vgpr_32 }
+ - { id: 9, class: vreg_64 }
+liveins:
+ - { reg: '%sgpr4_sgpr5', virtual-reg: '%1' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0:
+ liveins: %sgpr4_sgpr5
+
+ %1 = COPY %sgpr4_sgpr5
+ %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+ %m0 = S_MOV_B32 -1
+ %7 = COPY %5
+ %6 = DS_READ2_B32 %7, 0, 1, 0, implicit %m0, implicit %exec
+ undef %9.sub1 = V_MUL_LO_I32 %6.sub1, 3, implicit %exec
+ %9.sub0 = V_MUL_LO_I32 %6.sub0, %9.sub1, implicit %exec
+ DS_WRITE2_B32 %7, %9.sub0, %9.sub1, 4, 5, 0, implicit killed %m0, implicit %exec
+ S_ENDPGM
+
+...
OpenPOWER on IntegriCloud