diff options
| author | Jingyue Wu <jingyue@google.com> | 2015-07-22 04:16:52 +0000 |
|---|---|---|
| committer | Jingyue Wu <jingyue@google.com> | 2015-07-22 04:16:52 +0000 |
| commit | 20d73c6cc0e0010edd5e3239b78225388380f0d6 (patch) | |
| tree | 35f83c1995fb9de73d28a384ec501af3c0925a6b | |
| parent | e5407178d6789ccf462dbb39ad0acc5cbde6581c (diff) | |
| download | bcm5719-llvm-20d73c6cc0e0010edd5e3239b78225388380f0d6.tar.gz bcm5719-llvm-20d73c6cc0e0010edd5e3239b78225388380f0d6.zip | |
[BranchFolding] do not iterate the aliases of virtual registers
Summary:
MCRegAliasIterator only works for physical registers. So, do not run it
on virtual registers.
With this issue fixed, we can resurrect the BranchFolding pass in NVPTX
backend.
Reviewers: jholewinski, bkramer
Subscribers: henryhu, meheff, llvm-commits, jholewinski
Differential Revision: http://reviews.llvm.org/D11174
llvm-svn: 242871
| -rw-r--r-- | llvm/lib/CodeGen/BranchFolding.cpp | 43 | ||||
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/NVPTX/branch-fold.ll | 24 |
3 files changed, 52 insertions, 16 deletions
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 618266731c0..608f021b035 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -12,7 +12,8 @@ // it then removes. // // Note that this pass must be run after register allocation, it cannot handle -// SSA form. +// SSA form. It also must handle virtual registers for targets that emit virtual +// ISA (e.g. NVPTX). // //===----------------------------------------------------------------------===// @@ -1573,6 +1574,17 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, return nullptr; } +template <class Container> +static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI, + Container &Set) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Set.insert(*AI); + } else { + Set.insert(Reg); + } +} + /// findHoistingInsertPosAndDeps - Find the location to move common instructions /// in successors to. The location is usually just before the terminator, /// however if the terminator is a conditional branch and its previous @@ -1598,8 +1610,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!Reg) continue; if (MO.isUse()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Uses.insert(*AI); + addRegAndItsAliases(Reg, TRI, Uses); } else { if (!MO.isDead()) // Don't try to hoist code in the rare case the terminator defines a @@ -1608,8 +1619,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // If the terminator defines a register, make sure we don't hoist // the instruction whose def might be clobbered by the terminator. - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Defs.insert(*AI); + addRegAndItsAliases(Reg, TRI, Defs); } } @@ -1665,15 +1675,15 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!Reg) continue; if (MO.isUse()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Uses.insert(*AI); + addRegAndItsAliases(Reg, TRI, Uses); } else { if (Uses.erase(Reg)) { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Uses.erase(*SubRegs); // Use sub-registers to be conservative + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Uses.erase(*SubRegs); // Use sub-registers to be conservative + } } - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Defs.insert(*AI); + addRegAndItsAliases(Reg, TRI, Defs); } } @@ -1800,8 +1810,12 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if (!Reg || !LocalDefsSet.count(Reg)) continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - LocalDefsSet.erase(*AI); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.erase(*AI); + } else { + LocalDefsSet.erase(Reg); + } } // Track local defs so we can update liveins. @@ -1813,8 +1827,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!Reg) continue; LocalDefs.push_back(Reg); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - LocalDefsSet.insert(*AI); + addRegAndItsAliases(Reg, TRI, LocalDefsSet); } HasDups = true; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 20dfc2a5370..c6944845a15 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -163,7 +163,6 @@ void NVPTXPassConfig::addIRPasses() { // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). disablePass(&PrologEpilogCodeInserterID); disablePass(&MachineCopyPropagationID); - disablePass(&BranchFolderPassID); disablePass(&TailDuplicateID); addPass(createNVPTXImageOptimizerPass()); diff --git a/llvm/test/CodeGen/NVPTX/branch-fold.ll b/llvm/test/CodeGen/NVPTX/branch-fold.ll new file mode 100644 index 00000000000..305235be688 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/branch-fold.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp | FileCheck %s +; Disable CGP which also folds branches, so that only BranchFolding is under +; the spotlight. + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +define void @foo(i32 %x, float* %output) { +; CHECK-LABEL: .visible .func foo( +; CHECK-NOT: bra.uni +; CHECK-NOT: LBB0_ + %1 = icmp eq i32 %x, 1 + br i1 %1, label %then, label %else + +then: + br label %merge + +else: + br label %merge + +merge: + store float 2.0, float* %output + ret void +} |

