summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h11
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h4
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Localizer.h12
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Localizer.cpp219
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h4
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll62
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir87
8 files changed, 341 insertions, 62 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index f53b17df012..c1c92b78875 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1053,6 +1053,11 @@ public:
/// \returns True if the target wants to expand the given reduction intrinsic
/// into a shuffle sequence.
bool shouldExpandReduction(const IntrinsicInst *II) const;
+
+ /// \returns the size cost of rematerializing a GlobalValue address relative
+ /// to a stack reload.
+ unsigned getGISelRematGlobalCost() const;
+
/// @}
private:
@@ -1269,6 +1274,7 @@ public:
virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
+ virtual unsigned getGISelRematGlobalCost() const = 0;
virtual int getInstructionLatency(const Instruction *I) = 0;
};
@@ -1701,6 +1707,11 @@ public:
bool shouldExpandReduction(const IntrinsicInst *II) const override {
return Impl.shouldExpandReduction(II);
}
+
+ unsigned getGISelRematGlobalCost() const override {
+ return Impl.getGISelRematGlobalCost();
+ }
+
int getInstructionLatency(const Instruction *I) override {
return Impl.getInstructionLatency(I);
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index f8b36ec43a3..62e9d0f1925 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -572,6 +572,10 @@ public:
return true;
}
+ unsigned getGISelRematGlobalCost() const {
+ return 1;
+ }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
index cfc7c3567c5..8ab1e55195c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
@@ -27,6 +27,7 @@
namespace llvm {
// Forward declarations.
class MachineRegisterInfo;
+class TargetTransformInfo;
/// This pass implements the localization mechanism described at the
/// top of this file. One specificity of the implementation is that
@@ -43,9 +44,11 @@ private:
/// MRI contains all the register class/bank information that this
/// pass uses and updates.
MachineRegisterInfo *MRI;
+ /// TTI used for getting remat costs for instructions.
+ TargetTransformInfo *TTI;
/// Check whether or not \p MI needs to be moved close to its uses.
- static bool shouldLocalize(const MachineInstr &MI);
+ bool shouldLocalize(const MachineInstr &MI);
/// Check if \p MOUse is used in the same basic block as \p Def.
/// If the use is in the same block, we say it is local.
@@ -57,6 +60,13 @@ private:
/// Initialize the field members using \p MF.
void init(MachineFunction &MF);
+ /// Do inter-block localization from the entry block.
+ bool localizeInterBlock(MachineFunction &MF,
+ SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs);
+
+ /// Do intra-block localization of already localized instructions.
+ bool localizeIntraBlock(SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs);
+
public:
Localizer();
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index a55c1be1a09..83840aa7fbb 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -724,6 +724,10 @@ bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
return TTIImpl->shouldExpandReduction(II);
}
+unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
+ return TTIImpl->getGISelRematGlobalCost();
+}
+
int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
return TTIImpl->getInstructionLatency(I);
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index e61cddf114a..9b99ec12b82 100644
--- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Localizer.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -20,17 +21,55 @@
using namespace llvm;
char Localizer::ID = 0;
-INITIALIZE_PASS(Localizer, DEBUG_TYPE,
- "Move/duplicate certain instructions close to their use", false,
- false)
+INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE,
+ "Move/duplicate certain instructions close to their use",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
+ "Move/duplicate certain instructions close to their use",
+ false, false)
Localizer::Localizer() : MachineFunctionPass(ID) {
initializeLocalizerPass(*PassRegistry::getPassRegistry());
}
-void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); }
+void Localizer::init(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction());
+}
bool Localizer::shouldLocalize(const MachineInstr &MI) {
+ // Assuming a spill and reload of a value has a cost of 1 instruction each,
+ // this helper function computes the maximum number of uses we should consider
+ // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
+ // break even in terms of code size when the original MI has 2 users vs
+ // choosing to potentially spill. Any more than 2 users we we have a net code
+ // size increase. This doesn't take into account register pressure though.
+ auto maxUses = [](unsigned RematCost) {
+ // A cost of 1 means remats are basically free.
+ if (RematCost == 1)
+ return UINT_MAX;
+ if (RematCost == 2)
+ return 2U;
+
+ // Remat is too expensive, only sink if there's one user.
+ if (RematCost > 2)
+ return 1U;
+ llvm_unreachable("Unexpected remat cost");
+ };
+
+ // Helper to walk through uses and terminate if we've reached a limit. Saves
+ // us spending time traversing uses if all we want to know is if it's >= min.
+ auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
+ unsigned NumUses = 0;
+ auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end();
+ for (; UI != UE && NumUses < MaxUses; ++UI) {
+ NumUses++;
+ }
+ // If we haven't reached the end yet then there are more than MaxUses users.
+ return UI == UE;
+ };
+
switch (MI.getOpcode()) {
default:
return false;
@@ -40,10 +79,20 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) {
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_FRAME_INDEX:
return true;
+ case TargetOpcode::G_GLOBAL_VALUE: {
+ unsigned RematCost = TTI->getGISelRematGlobalCost();
+ unsigned Reg = MI.getOperand(0).getReg();
+ unsigned MaxUses = maxUses(RematCost);
+ if (MaxUses == UINT_MAX)
+ return true; // Remats are "free" so always localize.
+ bool B = isUsesAtMost(Reg, MaxUses);
+ return B;
+ }
}
}
void Localizer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -57,6 +106,106 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
return InsertMBB == Def.getParent();
}
+bool Localizer::localizeInterBlock(
+ MachineFunction &MF, SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs) {
+ bool Changed = false;
+ DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
+
+ // Since the IRTranslator only emits constants into the entry block, and the
+ // rest of the GISel pipeline generally emits constants close to their users,
+ // we only localize instructions in the entry block here. This might change if
+ // we start doing CSE across blocks.
+ auto &MBB = MF.front();
+ for (MachineInstr &MI : MBB) {
+ if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))
+ continue;
+ LLVM_DEBUG(dbgs() << "Should localize: " << MI);
+ assert(MI.getDesc().getNumDefs() == 1 &&
+ "More than one definition not supported yet");
+ unsigned Reg = MI.getOperand(0).getReg();
+ // Check if all the users of MI are local.
+ // We are going to invalidation the list of use operands, so we
+ // can't use range iterator.
+ for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
+ MOIt != MOItEnd;) {
+ MachineOperand &MOUse = *MOIt++;
+ // Check if the use is already local.
+ MachineBasicBlock *InsertMBB;
+ LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
+ dbgs() << "Checking use: " << MIUse
+ << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
+ if (isLocalUse(MOUse, MI, InsertMBB))
+ continue;
+ LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
+ Changed = true;
+ auto MBBAndReg = std::make_pair(InsertMBB, Reg);
+ auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
+ if (NewVRegIt == MBBWithLocalDef.end()) {
+ // Create the localized instruction.
+ MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
+ LocalizedInstrs.insert(LocalizedMI);
+ MachineInstr &UseMI = *MOUse.getParent();
+ if (MRI->hasOneUse(Reg) && !UseMI.isPHI())
+ InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI);
+ else
+ InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
+ LocalizedMI);
+
+ // Set a new register for the definition.
+ unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
+ MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
+ LocalizedMI->getOperand(0).setReg(NewReg);
+ NewVRegIt =
+ MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
+ LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
+ }
+ LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
+ << '\n');
+ // Update the user reg.
+ MOUse.setReg(NewVRegIt->second);
+ }
+ }
+ return Changed;
+}
+
+bool Localizer::localizeIntraBlock(
+ SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs) {
+ bool Changed = false;
+
+ // For each already-localized instruction which has multiple users, then we
+ // scan the block top down from the current position until we hit one of them.
+
+ // FIXME: Consider doing inst duplication if live ranges are very long due to
+ // many users, but this case may be better served by regalloc improvements.
+
+ for (MachineInstr *MI : LocalizedInstrs) {
+ unsigned Reg = MI->getOperand(0).getReg();
+ MachineBasicBlock &MBB = *MI->getParent();
+ // If the instruction has a single use, we would have already moved it right
+ // before its user in localizeInterBlock().
+ if (MRI->hasOneUse(Reg))
+ continue;
+
+ // All of the user MIs of this reg.
+ SmallPtrSet<MachineInstr *, 32> Users;
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg))
+ Users.insert(&UseMI);
+
+ MachineBasicBlock::iterator II(MI);
+ ++II;
+ while (II != MBB.end() && !Users.count(&*II))
+ ++II;
+
+ LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II
+ << "\n");
+ assert(II != MBB.end() && "Didn't find the user in the MBB");
+ MI->removeFromParent();
+ MBB.insert(II, MI);
+ Changed = true;
+ }
+ return Changed;
+}
+
bool Localizer::runOnMachineFunction(MachineFunction &MF) {
// If the ISel pipeline failed, do not bother running that pass.
if (MF.getProperties().hasProperty(
@@ -67,62 +216,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
init(MF);
- bool Changed = false;
- // Keep track of the instructions we localized.
- // We won't need to process them if we see them later in the CFG.
- SmallPtrSet<MachineInstr *, 16> LocalizedInstrs;
- DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
- // TODO: Do bottom up traversal.
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))
- continue;
- LLVM_DEBUG(dbgs() << "Should localize: " << MI);
- assert(MI.getDesc().getNumDefs() == 1 &&
- "More than one definition not supported yet");
- unsigned Reg = MI.getOperand(0).getReg();
- // Check if all the users of MI are local.
- // We are going to invalidation the list of use operands, so we
- // can't use range iterator.
- for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
- MOIt != MOItEnd;) {
- MachineOperand &MOUse = *MOIt++;
- // Check if the use is already local.
- MachineBasicBlock *InsertMBB;
- LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
- dbgs() << "Checking use: " << MIUse
- << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
- if (isLocalUse(MOUse, MI, InsertMBB))
- continue;
- LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
- Changed = true;
- auto MBBAndReg = std::make_pair(InsertMBB, Reg);
- auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
- if (NewVRegIt == MBBWithLocalDef.end()) {
- // Create the localized instruction.
- MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
- LocalizedInstrs.insert(LocalizedMI);
- // Don't try to be smart for the insertion point.
- // There is no guarantee that the first seen use is the first
- // use in the block.
- InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
- LocalizedMI);
+ // Keep track of the instructions we localized. We'll do a second pass of
+ // intra-block localization to further reduce live ranges.
+ SmallPtrSet<MachineInstr *, 32> LocalizedInstrs;
- // Set a new register for the definition.
- unsigned NewReg =
- MRI->createGenericVirtualRegister(MRI->getType(Reg));
- MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
- LocalizedMI->getOperand(0).setReg(NewReg);
- NewVRegIt =
- MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
- LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
- }
- LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
- << '\n');
- // Update the user reg.
- MOUse.setReg(NewVRegIt->second);
- }
- }
- }
- return Changed;
+ bool Changed = localizeInterBlock(MF, LocalizedInstrs);
+ return Changed |= localizeIntraBlock(LocalizedInstrs);
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 67c3707ec5c..10c15a139b4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -165,6 +165,10 @@ public:
return false;
}
+ unsigned getGISelRematGlobalCost() const {
+ return 2;
+ }
+
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
new file mode 100644
index 00000000000..a6c6326fcdd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -o - -verify-machineinstrs -O0 -global-isel -stop-after=localizer %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+@var1 = common global i32 0, align 4
+@var2 = common global i32 0, align 4
+@var3 = common global i32 0, align 4
+@var4 = common global i32 0, align 4
+
+; This is an ll test instead of MIR because -run-pass doesn't seem to support
+; initializing the target TTI which we need for this test.
+
+; Some of the instructions in entry block are dead after this pass so don't
+; strictly need to be checked for.
+
+define i32 @foo() {
+ ; CHECK-LABEL: name: foo
+ ; CHECK: bb.1.entry:
+ ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
+ ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
+ ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
+ ; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
+ ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
+ ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
+ ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var1)
+ ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]]
+ ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32)
+ ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.2
+ ; CHECK: G_BR %bb.3
+ ; CHECK: bb.2.if.then:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
+ ; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
+ ; CHECK: G_STORE [[C4]](s32), [[GV3]](p0) :: (store 4 into @var2)
+ ; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
+ ; CHECK: G_STORE [[C5]](s32), [[GV]](p0) :: (store 4 into @var1)
+ ; CHECK: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
+ ; CHECK: G_STORE [[C4]](s32), [[GV4]](p0) :: (store 4 into @var3)
+ ; CHECK: G_STORE [[C5]](s32), [[GV]](p0) :: (store 4 into @var1)
+ ; CHECK: bb.3.if.end:
+ ; CHECK: [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
+ ; CHECK: $w0 = COPY [[C6]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+entry:
+ %0 = load i32, i32* @var1, align 4
+ %cmp = icmp eq i32 %0, 1
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i32 2, i32* @var2, align 4
+ store i32 3, i32* @var1, align 4
+ store i32 2, i32* @var3, align 4
+ store i32 3, i32* @var1, align 4
+ br label %if.end
+
+if.end:
+ ret i32 0
+}
+
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir b/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir
index e4648a868f2..dad2240aa84 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir
@@ -15,6 +15,29 @@
define void @float_non_local_phi_use_followed_by_use_fi() { ret void }
define void @non_local_phi() { ret void }
define void @non_local_label() { ret void }
+
+ @var1 = common global i32 0, align 4
+ @var2 = common global i32 0, align 4
+ @var3 = common global i32 0, align 4
+ @var4 = common global i32 0, align 4
+
+ define i32 @intrablock_with_globalvalue() {
+ entry:
+ %0 = load i32, i32* @var1, align 4
+ %cmp = icmp eq i32 %0, 1
+ br i1 %cmp, label %if.then, label %if.end
+
+ if.then:
+ store i32 2, i32* @var2, align 4
+ store i32 3, i32* @var1, align 4
+ store i32 2, i32* @var3, align 4
+ store i32 3, i32* @var1, align 4
+ br label %if.end
+
+ if.end:
+ ret i32 0
+ }
+
...
---
@@ -301,3 +324,67 @@ body: |
%2:fpr(s32) = G_FADD %0, %1
G_BR %bb.1
...
+---
+name: intrablock_with_globalvalue
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: intrablock_with_globalvalue
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
+ ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
+ ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
+ ; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
+ ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
+ ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
+ ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var1)
+ ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]]
+ ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32)
+ ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1.if.then:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
+ ; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
+ ; CHECK: G_STORE [[C4]](s32), [[GV3]](p0) :: (store 4 into @var2)
+ ; CHECK: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
+ ; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
+ ; CHECK: G_STORE [[C5]](s32), [[GV4]](p0) :: (store 4 into @var1)
+ ; CHECK: [[GV5:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
+ ; CHECK: G_STORE [[C4]](s32), [[GV5]](p0) :: (store 4 into @var3)
+ ; CHECK: G_STORE [[C5]](s32), [[GV4]](p0) :: (store 4 into @var1)
+ ; CHECK: bb.2.if.end:
+ ; CHECK: [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
+ ; CHECK: $w0 = COPY [[C6]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+
+ ; Some of these instructions are dead. We're checking that the other instructions are
+ ; sunk immediately before their first user in the if.then block or as close as possible.
+ bb.1.entry:
+ %1:gpr(p0) = G_GLOBAL_VALUE @var1
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %4:gpr(s32) = G_CONSTANT i32 2
+ %5:gpr(p0) = G_GLOBAL_VALUE @var2
+ %6:gpr(s32) = G_CONSTANT i32 3
+ %7:gpr(p0) = G_GLOBAL_VALUE @var3
+ %8:gpr(s32) = G_CONSTANT i32 0
+ %0:gpr(s32) = G_LOAD %1(p0) :: (load 4 from @var1)
+ %9:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2
+ %3:gpr(s1) = G_TRUNC %9(s32)
+ G_BRCOND %3(s1), %bb.2
+ G_BR %bb.3
+
+ bb.2.if.then:
+ G_STORE %4(s32), %5(p0) :: (store 4 into @var2)
+ G_STORE %6(s32), %1(p0) :: (store 4 into @var1)
+ G_STORE %4(s32), %7(p0) :: (store 4 into @var3)
+ G_STORE %6(s32), %1(p0) :: (store 4 into @var1)
+
+ bb.3.if.end:
+ $w0 = COPY %8(s32)
+ RET_ReallyLR implicit $w0
+
+...
OpenPOWER on IntegriCloud