diff options
Diffstat (limited to 'llvm/lib')
9 files changed, 314 insertions, 29 deletions
diff --git a/llvm/lib/Target/WebAssembly/CMakeLists.txt b/llvm/lib/Target/WebAssembly/CMakeLists.txt index 82f72269d0d..6a5894958e3 100644 --- a/llvm/lib/Target/WebAssembly/CMakeLists.txt +++ b/llvm/lib/Target/WebAssembly/CMakeLists.txt @@ -20,11 +20,14 @@ add_llvm_target(WebAssemblyCodeGen WebAssemblyInstrInfo.cpp WebAssemblyMachineFunctionInfo.cpp WebAssemblyMCInstLower.cpp + WebAssemblyOptimizeReturned.cpp + WebAssemblyPeephole.cpp WebAssemblyRegisterInfo.cpp WebAssemblyRegColoring.cpp WebAssemblyRegNumbering.cpp WebAssemblyRegStackify.cpp WebAssemblySelectionDAGInfo.cpp + WebAssemblyStoreResults.cpp WebAssemblySubtarget.cpp WebAssemblyTargetMachine.cpp WebAssemblyTargetTransformInfo.cpp diff --git a/llvm/lib/Target/WebAssembly/README.txt b/llvm/lib/Target/WebAssembly/README.txt index 8604528b2fc..bfb124d504e 100644 --- a/llvm/lib/Target/WebAssembly/README.txt +++ b/llvm/lib/Target/WebAssembly/README.txt @@ -22,7 +22,7 @@ Interesting work that remains to be done: //===---------------------------------------------------------------------===// -set_local and store instructions have a return value. We should (a) model this, +set_local instructions have a return value. We should (a) model this, and (b) write optimizations which take advantage of it. Keep in mind that many set_local instructions are implicit! diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.h b/llvm/lib/Target/WebAssembly/WebAssembly.h index 59856de9553..001f9f9d4a7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.h +++ b/llvm/lib/Target/WebAssembly/WebAssembly.h @@ -23,13 +23,17 @@ namespace llvm { class WebAssemblyTargetMachine; class FunctionPass; +FunctionPass *createWebAssemblyOptimizeReturned(); + FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, CodeGenOpt::Level OptLevel); +FunctionPass *createWebAssemblyStoreResults(); FunctionPass *createWebAssemblyRegStackify(); FunctionPass *createWebAssemblyRegColoring(); FunctionPass *createWebAssemblyCFGStackify(); FunctionPass *createWebAssemblyRegNumbering(); +FunctionPass *createWebAssemblyPeephole(); FunctionPass *createWebAssemblyRelooper(); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index 3621571151a..af5eab671f2 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -74,6 +74,7 @@ private: // Custom lowering hooks. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; }; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp new file mode 100644 index 00000000000..dea419c5975 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp @@ -0,0 +1,73 @@ +//===-- WebAssemblyOptimizeReturned.cpp - Optimize "returned" attributes --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Optimize calls with "returned" attributes for WebAssembly. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-optimize-returned" + +namespace { +class OptimizeReturned final : public FunctionPass, + public InstVisitor<OptimizeReturned> { + const char *getPassName() const override { + return "WebAssembly Optimize Returned"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + FunctionPass::getAnalysisUsage(AU); + } + + bool runOnFunction(Function &F) override; + + DominatorTree *DT; + +public: + static char ID; + OptimizeReturned() : FunctionPass(ID), DT(nullptr) {} + + void visitCallSite(CallSite CS); +}; +} // End anonymous namespace + +char OptimizeReturned::ID = 0; +FunctionPass *llvm::createWebAssemblyOptimizeReturned() { + return new OptimizeReturned(); +} + +void OptimizeReturned::visitCallSite(CallSite CS) { + for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i) + if (CS.paramHasAttr(1 + i, Attribute::Returned)) { + Instruction *Inst = CS.getInstruction(); + Value *Arg = CS.getArgOperand(i); + // Like replaceDominatedUsesWith but using Instruction/Use dominance. + for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) { + Use &U = *UI++; + if (DT->dominates(Inst, U)) + U.set(Inst); + } + } +} + +bool OptimizeReturned::runOnFunction(Function &F) { + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + visit(F); + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp new file mode 100644 index 00000000000..139956225b9 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -0,0 +1,77 @@ +//===-- WebAssemblyPeephole.cpp - WebAssembly Peephole Optimiztions -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Late peephole optimizations for WebAssembly. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-peephole" + +namespace { +class WebAssemblyPeephole final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly late peephole optimizer"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; + WebAssemblyPeephole() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyPeephole::ID = 0; +FunctionPass *llvm::createWebAssemblyPeephole() { + return new WebAssemblyPeephole(); +} + +bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + + for (auto &MBB : MF) + for (auto &MI : MBB) + switch (MI.getOpcode()) { + default: + break; + case WebAssembly::STORE8_I32: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE8_I64: + case WebAssembly::STORE16_I64: + case WebAssembly::STORE32_I64: + case WebAssembly::STORE_F32: + case WebAssembly::STORE_F64: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_I64: { + // Store instructions return their value operand. If we ended up using + // the same register for both, replace it with a dead def so that it + // can use $discard instead. + MachineOperand &MO = MI.getOperand(0); + unsigned OldReg = MO.getReg(); + if (OldReg == MI.getOperand(2).getReg()) { + unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + MO.setReg(NewReg); + MO.setIsDead(); + MFI.stackifyVReg(NewReg); + } + } + } + + return Changed; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 5ba7c314908..ba2a0e20b2b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -23,6 +23,7 @@ #include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -40,6 +41,7 @@ class WebAssemblyRegStackify final : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired<AAResultsWrapperPass>(); AU.addPreserved<MachineBlockFrequencyInfo>(); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -70,6 +72,24 @@ static void ImposeStackOrdering(MachineInstr *MI) { /*isImp=*/true)); } +// Test whether it's safe to move Def to just before Insert. Note that this +// doesn't account for physical register dependencies, because WebAssembly +// doesn't have any (other than special ones like EXPR_STACK). +// TODO: Compute memory dependencies in a way that doesn't require always +// walking the block. +// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be +// more precise. +static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, + AliasAnalysis &AA) { + bool SawStore = false, SawSideEffects = false; + MachineBasicBlock::const_iterator D(Def), I(Insert); + for (--I; I != D; --I) + SawSideEffects |= I->isSafeToMove(&AA, SawStore); + + return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) && + !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore)); +} + bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Register Stackifying **********\n" "********** Function: " @@ -78,6 +98,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; MachineRegisterInfo &MRI = MF.getRegInfo(); WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); // Walk the instructions from the bottom up. Currently we don't look past // block boundaries, and the blocks aren't ordered so the block visitation @@ -90,12 +111,17 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { if (Insert->getOpcode() == TargetOpcode::PHI) break; + // Don't nest anything inside an inline asm, because we don't have + // constraints for $push inputs. + if (Insert->getOpcode() == TargetOpcode::INLINEASM) + break; + // Iterate through the inputs in reverse order, since we'll be pulling // operands off the stack in FIFO order. bool AnyStackified = false; for (MachineOperand &Op : reverse(Insert->uses())) { // We're only interested in explicit virtual register operands. - if (!Op.isReg() || Op.isImplicit()) + if (!Op.isReg() || Op.isImplicit() || !Op.isUse()) continue; unsigned Reg = Op.getReg(); @@ -112,6 +138,15 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF) continue; + // Don't nest an INLINE_ASM def into anything, because we don't have + // constraints for $pop outputs. + if (Def->getOpcode() == TargetOpcode::INLINEASM) + continue; + + // Don't nest PHIs inside of anything. + if (Def->getOpcode() == TargetOpcode::PHI) + continue; + // Argument instructions represent live-in registers and not real // instructions. if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 || @@ -124,8 +159,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { // they be trivially clonable. // TODO: Eventually we'll relax this, to take advantage of set_local // returning its result. - bool OneUse = MRI.hasOneUse(Reg); - if (!OneUse && !Def->isMoveImmediate()) + if (!MRI.hasOneUse(Reg)) continue; // For now, be conservative and don't look across block boundaries, @@ -134,35 +168,19 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { if (Def->getParent() != &MBB && !Def->isMoveImmediate()) continue; - // For now, be simple and don't reorder loads, stores, or side effects. - // TODO: Be more aggressive. - if ((Def->mayLoad() || Def->mayStore() || - Def->hasUnmodeledSideEffects())) + // Don't move instructions that have side effects or memory dependencies + // or other complications. + if (!IsSafeToMove(Def, Insert, AA)) continue; Changed = true; AnyStackified = true; - if (OneUse) { - // Move the def down and nest it in the current instruction. - MBB.insert(MachineBasicBlock::instr_iterator(Insert), - Def->removeFromParent()); - MFI.stackifyVReg(Reg); - ImposeStackOrdering(Def); - Insert = Def; - } else { - // Clone the def down and nest it in the current instruction. - MachineInstr *Clone = MF.CloneMachineInstr(Def); - unsigned OldReg = Def->getOperand(0).getReg(); - unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); - assert(Op.getReg() == OldReg); - assert(Clone->getOperand(0).getReg() == OldReg); - Op.setReg(NewReg); - Clone->getOperand(0).setReg(NewReg); - MBB.insert(MachineBasicBlock::instr_iterator(Insert), Clone); - MFI.stackifyVReg(Reg); - ImposeStackOrdering(Clone); - Insert = Clone; - } + // Move the def down and nest it in the current instruction. + MBB.insert(MachineBasicBlock::instr_iterator(Insert), + Def->removeFromParent()); + MFI.stackifyVReg(Reg); + ImposeStackOrdering(Def); + Insert = Def; } if (AnyStackified) ImposeStackOrdering(&MI); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp new file mode 100644 index 00000000000..d0735b84de6 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -0,0 +1,102 @@ +//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements an optimization pass using store result values. +/// +/// WebAssembly's store instructions return the stored value, specifically to +/// enable the optimization of reducing get_local/set_local traffic, which is +/// what we're doing here. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-store-results" + +namespace { +class WebAssemblyStoreResults final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyStoreResults() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "WebAssembly Store Results"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: +}; +} // end anonymous namespace + +char WebAssemblyStoreResults::ID = 0; +FunctionPass *llvm::createWebAssemblyStoreResults() { + return new WebAssemblyStoreResults(); +} + +bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Store Results **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + + for (auto &MBB : MF) + for (auto &MI : MBB) + switch (MI.getOpcode()) { + default: + break; + case WebAssembly::STORE8_I32: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE8_I64: + case WebAssembly::STORE16_I64: + case WebAssembly::STORE32_I64: + case WebAssembly::STORE_F32: + case WebAssembly::STORE_F64: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_I64: + unsigned ToReg = MI.getOperand(0).getReg(); + unsigned FromReg = MI.getOperand(2).getReg(); + for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { + MachineOperand &O = *I++; + MachineInstr *Where = O.getParent(); + if (Where->getOpcode() == TargetOpcode::PHI) + Where = Where->getOperand(&O - &Where->getOperand(0) + 1) + .getMBB() + ->getFirstTerminator(); + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + O.setReg(ToReg); + } + } + + return true; +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index d60c41a2c87..493e4be18dc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -143,6 +143,9 @@ void WebAssemblyPassConfig::addIRPasses() { // control specifically what gets lowered. addPass(createAtomicExpandPass(TM)); + // Optimize "returned" function attributes. + addPass(createWebAssemblyOptimizeReturned()); + TargetPassConfig::addIRPasses(); } @@ -157,6 +160,9 @@ bool WebAssemblyPassConfig::addInstSelector() { bool WebAssemblyPassConfig::addILPOpts() { return true; } void WebAssemblyPassConfig::addPreRegAlloc() { + // Prepare store instructions for register stackifying. + addPass(createWebAssemblyStoreResults()); + // Mark registers as representing wasm's expression stack. addPass(createWebAssemblyRegStackify()); } @@ -183,4 +189,5 @@ void WebAssemblyPassConfig::addPreSched2() {} void WebAssemblyPassConfig::addPreEmitPass() { addPass(createWebAssemblyCFGStackify()); addPass(createWebAssemblyRegNumbering()); + addPass(createWebAssemblyPeephole()); } |