summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/CodeGen/LiveInterval.h27
-rw-r--r--llvm/lib/CodeGen/LiveInterval.cpp19
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp12
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h3
-rw-r--r--llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir81
6 files changed, 136 insertions, 11 deletions
diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h
index 290a2381d9c..fe5adb59dac 100644
--- a/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -836,10 +836,35 @@ namespace llvm {
/// don't defne the related lane masks after they get shrunk. E.g.,
/// when L000F gets split into L0007 and L0008 maybe only a subset
/// of the VNIs that defined L000F defines L0007.
+ ///
+ /// The clean up of the VNIs need to look at the actual instructions
+ /// to decide what is or is not live at a definition point. If the
+ /// update of the subranges occurs while the IR does not reflect these
+ /// changes, \p ComposeSubRegIdx can be used to specify how the
+ /// definition are going to be rewritten.
+ /// E.g., let say we want to merge:
+ /// V1.sub1:<2 x s32> = COPY V2.sub3:<4 x s32>
+ /// We do that by choosing a class where sub1:<2 x s32> and sub3:<4 x s32>
+ /// overlap, i.e., by choosing a class where we can find "offset + 1 == 3".
+ /// Put differently we align V2's sub3 with V1's sub1:
+ /// V2: sub0 sub1 sub2 sub3
+ /// V1: <offset> sub0 sub1
+ ///
+ /// This offset will look like a composed subregidx in the the class:
+ /// V1.(composed sub2 with sub1):<4 x s32> = COPY V2.sub3:<4 x s32>
+ /// => V1.(composed sub2 with sub1):<4 x s32> = COPY V2.sub3:<4 x s32>
+ ///
+ /// Now if we didn't rewrite the uses and def of V1, all the checks for V1
+ /// need to account for this offset.
+ /// This happens during coalescing where we update the live-ranges while
+ /// still having the old IR around because updating the IR on-the-fly
+ /// would actually clobber some information on how the live-ranges that
+ /// are being updated look like.
void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
std::function<void(LiveInterval::SubRange &)> Apply,
const SlotIndexes &Indexes,
- const TargetRegisterInfo &TRI);
+ const TargetRegisterInfo &TRI,
+ unsigned ComposeSubRegIdx = 0);
bool operator<(const LiveInterval& other) const {
const SlotIndex &thisIndex = beginIndex();
diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp
index 54ac46f2e7c..930dc116205 100644
--- a/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/llvm/lib/CodeGen/LiveInterval.cpp
@@ -883,7 +883,8 @@ void LiveInterval::clearSubRanges() {
static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
LaneBitmask LaneMask,
const SlotIndexes &Indexes,
- const TargetRegisterInfo &TRI) {
+ const TargetRegisterInfo &TRI,
+ unsigned ComposeSubRegIdx) {
// Phys reg should not be tracked at subreg level.
// Same for noreg (Reg == 0).
if (!Register::isVirtualRegister(Reg) || !Reg)
@@ -905,7 +906,12 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
continue;
if (MOI->getReg() != Reg)
continue;
- if ((TRI.getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none())
+ LaneBitmask OrigMask = TRI.getSubRegIndexLaneMask(MOI->getSubReg());
+ LaneBitmask ExpectedDefMask =
+ ComposeSubRegIdx
+ ? TRI.composeSubRegIndexLaneMask(ComposeSubRegIdx, OrigMask)
+ : OrigMask;
+ if ((ExpectedDefMask & LaneMask).none())
continue;
hasDef = true;
break;
@@ -924,7 +930,8 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
void LiveInterval::refineSubRanges(
BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
std::function<void(LiveInterval::SubRange &)> Apply,
- const SlotIndexes &Indexes, const TargetRegisterInfo &TRI) {
+ const SlotIndexes &Indexes, const TargetRegisterInfo &TRI,
+ unsigned ComposeSubRegIdx) {
LaneBitmask ToApply = LaneMask;
for (SubRange &SR : subranges()) {
LaneBitmask SRMask = SR.LaneMask;
@@ -944,8 +951,10 @@ void LiveInterval::refineSubRanges(
MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
// Now that the subrange is split in half, make sure we
// only keep in the subranges the VNIs that touch the related half.
- stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI);
- stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI);
+ stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI,
+ ComposeSubRegIdx);
+ stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI,
+ ComposeSubRegIdx);
}
Apply(*MatchingRange);
ToApply &= ~Matching;
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index c44a302c499..e25f0638d68 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -225,7 +225,8 @@ namespace {
/// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
/// lanemasks already adjusted to the coalesced register.
void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
- LaneBitmask LaneMask, CoalescerPair &CP);
+ LaneBitmask LaneMask, CoalescerPair &CP,
+ unsigned DstIdx);
/// Join the liveranges of two subregisters. Joins @p RRange into
/// @p LRange, @p RRange may be invalid afterwards.
@@ -3271,7 +3272,8 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
const LiveRange &ToMerge,
LaneBitmask LaneMask,
- CoalescerPair &CP) {
+ CoalescerPair &CP,
+ unsigned ComposeSubRegIdx) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
LI.refineSubRanges(
Allocator, LaneMask,
@@ -3284,7 +3286,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
}
},
- *LIS->getSlotIndexes(), *TRI);
+ *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx);
}
bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
@@ -3350,12 +3352,12 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
if (!RHS.hasSubRanges()) {
LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
: TRI->getSubRegIndexLaneMask(SrcIdx);
- mergeSubRangeInto(LHS, RHS, Mask, CP);
+ mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx);
} else {
// Pair up subranges and merge.
for (LiveInterval::SubRange &R : RHS.subranges()) {
LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
- mergeSubRangeInto(LHS, R, Mask, CP);
+ mergeSubRangeInto(LHS, R, Mask, CP, DstIdx);
}
}
LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index c9316a71bdf..eb4d39b01cb 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -72,6 +72,9 @@ static cl::opt<bool>
ForceFastISel("arm-force-fast-isel",
cl::init(false), cl::Hidden);
+static cl::opt<bool> EnableSubRegLiveness("arm-enable-subreg-liveness",
+ cl::init(false), cl::Hidden);
+
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -379,6 +382,8 @@ bool ARMSubtarget::enableMachineScheduler() const {
return useMachineScheduler();
}
+bool ARMSubtarget::enableSubRegLiveness() const { return EnableSubRegLiveness; }
+
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostRAScheduler() const {
if (enableMachineScheduler())
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 8478665979f..f582a92f656 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -806,6 +806,9 @@ public:
/// True for some subtargets at > -O0.
bool enablePostRAMachineScheduler() const override;
+ /// Check whether this subtarget wants to use subregister liveness.
+ bool enableSubRegLiveness() const override;
+
/// Enable use of alias analysis during code generation (during MI
/// scheduling, DAGCombine, etc.).
bool useAA() const override { return true; }
diff --git a/llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir b/llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir
new file mode 100644
index 00000000000..ef1bfa652ec
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir
@@ -0,0 +1,81 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -start-before simple-register-coalescing -mtriple=arm-apple-ios -stop-after machine-scheduler -o - -arm-enable-subreg-liveness -verify-machineinstrs | FileCheck %s
+
+# Check that when we merge live-ranges that imply offseting
+# the definition of a subregister by some other subreg index,
+# we take that new index into account while updating the subrange.
+#
+# For this specific test case, the coalescer is going to get rid
+# of `%5.dsub_1:dtriple = COPY %4.dsub_3` by aligning
+# %5.dsub_1:<3 x s64> with %4.dsub_3:<4 x s64>.
+# This is done by moving to a bigger register class <5 x s64>
+# and offseting %5 definitions with a new subregidx:
+# NewVar: <5 x s64> dsub_0 dsub_1 dsub_2 dsub_3 dsub_4
+# %4: <4 x s64> dsub_0 dsub_1 dsub_2 dsub_3
+# %5: <3 x s64> <==offset===> dsub_0 dsub_1 dsub_2
+#
+# In other %5.dsub_0 needs to be mapped to NewVar.dsub_2, %5.dsub_1
+# to NewVar.dsub_3 and so on. So essentially we are offseting %5 by
+# dsub_2.
+#
+# When updating the live-ranges, the register coalescer actually
+# has not rewritten the original code, so we need to fake the
+# rewrite to do that update.
+# This used to be wrong and this test was failling with a machine
+# verifier error: No live segment at def.
+#
+# The test case runs through the coalescer *and* the scheduler, just
+# to force the live intervals to be carried around so that the verifier
+# gets a chance to verify those. If we were to just run the coalescer,
+# the live intervals would be dropped before running the verifier since
+# no other pass would need that analysis around.
+#
+# Note: The test case looks slightly more complicated than just the
+# offseting part. That's because the bug needs three things to
+# trigger:
+# 1. Overlapping subreg lanes: here, dsub0 == <ssub0, ssub1>
+# 2. Tuple registers with a possibility to coalesce the subreg index:
+# here, what we explain with %5.dsub_1 == %4.dsub_3
+# 3. Subreg liveness enabled.
+# #1 is required to trigger the splitting of subranges that implies
+# looking at the IR to decide what is alive and what is not.
+# #2 is what produces the IR to be out-of-synce with what the reg coalescer
+# maintains for the live-ranges information.
+# #3 is, well, the problem has to do with subranges updates!
+#
+# In the end, the expected result is to have all the variables
+# being coalesced in one big (qqqq) variable.
+---
+name: main
+alignment: 1
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d2, $s1, $d4
+
+
+ ; CHECK-LABEL: name: main
+ ; CHECK: liveins: $d2, $s1, $d4
+ ; CHECK: undef %4.dsub_0:qqqqpr_with_ssub_4 = COPY $d4
+ ; CHECK: %4.ssub_4:qqqqpr_with_ssub_4 = COPY $s1
+ ; CHECK: %4.dsub_1:qqqqpr_with_ssub_4 = COPY $d2
+ ; CHECK: %4.dsub_3:qqqqpr_with_ssub_4 = COPY %4.dsub_1
+ ; CHECK: KILL implicit-def %4.dsub_2, implicit %4.qqsub_0
+ ; CHECK: %4.dsub_4:qqqqpr_with_ssub_4 = COPY %4.dsub_1
+ ; CHECK: tBX_RET 14, $noreg, implicit %4.ssub_4_ssub_5_ssub_6_ssub_7_ssub_8_ssub_9
+ %3:dpr_vfp2 = COPY $d4
+ undef %0.ssub_0:dpr_vfp2 = COPY $s1
+ %1:dpr_vfp2 = COPY $d2
+ undef %4.dsub_0:dquad = COPY %3
+ %4.dsub_1:dquad = COPY %1
+ %4.dsub_2:dquad = COPY %0
+ %4.dsub_3:dquad = COPY %1
+ KILL implicit-def undef %5.dsub_0:dtriple, implicit %4
+ %5.dsub_1:dtriple = COPY %4.dsub_3
+ %5.dsub_2:dtriple = COPY %1
+ tBX_RET 14, $noreg, implicit %5
+
+...
OpenPOWER on IntegriCloud