//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines an instruction selector for the RISCV target. // //===----------------------------------------------------------------------===// #include "RISCV.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "RISCVTargetMachine.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "riscv-isel" // RISCV-specific code to select RISCV machine instructions for // SelectionDAG operations. namespace { class RISCVDAGToDAGISel final : public SelectionDAGISel { const RISCVSubtarget *Subtarget; public: explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine) : SelectionDAGISel(TargetMachine) {} StringRef getPassName() const override { return "RISCV DAG->DAG Pattern Instruction Selection"; } bool runOnMachineFunction(MachineFunction &MF) override { Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } void PostprocessISelDAG() override; void Select(SDNode *Node) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; bool SelectAddrFI(SDValue Addr, SDValue &Base); // Include the pieces autogenerated from the target description. #include "RISCVGenDAGISel.inc" private: void doPeepholeLoadStoreADDI(); void doPeepholeGlobalAddiLuiOffset(); void doPeepholeBuildPairF64SplitF64(); }; } void RISCVDAGToDAGISel::PostprocessISelDAG() { doPeepholeLoadStoreADDI(); doPeepholeGlobalAddiLuiOffset(); doPeepholeBuildPairF64SplitF64(); } void RISCVDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); MVT XLenVT = Subtarget->getXLenVT(); // If we have a custom node, we have already selected if (Node->isMachineOpcode()) { LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); Node->setNodeId(-1); return; } // Instruction Selection not handled by the auto-generated tablegen selection // should be handled here. EVT VT = Node->getValueType(0); if (Opcode == ISD::Constant && VT == XLenVT) { auto *ConstNode = cast(Node); // Materialize zero constants as copies from X0. This allows the coalescer // to propagate these into other instructions. if (ConstNode->isNullValue()) { SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), RISCV::X0, XLenVT); ReplaceNode(Node, New.getNode()); return; } } if (Opcode == ISD::FrameIndex) { SDLoc DL(Node); SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT); int FI = cast(Node)->getIndex(); EVT VT = Node->getValueType(0); SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm)); return; } // Select the default instruction. SelectCode(Node); } bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { switch (ConstraintID) { case InlineAsm::Constraint_i: case InlineAsm::Constraint_m: // We just support simple memory operands that have a single address // operand and need no special handling. OutOps.push_back(Op); return false; default: break; } return true; } bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { if (auto FIN = dyn_cast(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); return true; } return false; } // Detect the pattern lui %hi(global) --> ADDI %lo(global) // HiLUI LoADDI static bool detectLuiAddiGlobal(SDNode *Tail, unsigned &Idx, SDValue &LoADDI, SDValue &HiLUI, GlobalAddressSDNode *&GAlo, GlobalAddressSDNode *&GAhi) { // Try to detect the pattern on every operand of the tail instruction. for (Idx = 0; Idx < Tail->getNumOperands(); Idx++) { LoADDI = Tail->getOperand(Idx); // LoADDI should only be used by one instruction (Tail). if (!LoADDI->isMachineOpcode() || !(LoADDI->getMachineOpcode() == RISCV::ADDI) || !isa(LoADDI->getOperand(1)) || !LoADDI->hasOneUse()) continue; // Check for existence of %lo target flag. GAlo = cast(LoADDI->getOperand(1)); if (!(GAlo->getTargetFlags() == RISCVII::MO_LO) || !(GAlo->getOffset() == 0)) return false; // Check for existence of %hi target flag. HiLUI = LoADDI->getOperand(0); if (!HiLUI->isMachineOpcode() || !(HiLUI->getMachineOpcode() == RISCV::LUI) || !isa(HiLUI->getOperand(0)) || !HiLUI->hasOneUse()) return false; GAhi = cast(HiLUI->getOperand(0)); if (!(GAhi->getTargetFlags() == RISCVII::MO_HI) || !(GAhi->getOffset() == 0)) return false; return true; } return false; } static bool matchLuiOffset(SDValue &OffsetLUI, int64_t &Offset) { if (!OffsetLUI->isMachineOpcode() || !(OffsetLUI->getMachineOpcode() == RISCV::LUI) || !isa(OffsetLUI->getOperand(0))) return false; Offset = cast(OffsetLUI->getOperand(0))->getSExtValue(); Offset = Offset << 12; LLVM_DEBUG(dbgs() << " Detected \" LUI Offset_hi\"\n"); return true; } static bool matchAddiLuiOffset(SDValue &OffsetLoADDI, int64_t &Offset) { // LoADDI should only be used by the tail instruction only. if (!OffsetLoADDI->isMachineOpcode() || !(OffsetLoADDI->getMachineOpcode() == RISCV::ADDI) || !isa(OffsetLoADDI->getOperand(1)) || !OffsetLoADDI->hasOneUse()) return false; int64_t OffLo = cast(OffsetLoADDI->getOperand(1))->getZExtValue(); // HiLUI should only be used by the loADDI. SDValue OffsetHiLUI = (OffsetLoADDI->getOperand(0)); if (!OffsetHiLUI->isMachineOpcode() || !(OffsetHiLUI->getMachineOpcode() == RISCV::LUI) || !isa(OffsetHiLUI->getOperand(0)) || !OffsetHiLUI->hasOneUse()) return false; int64_t OffHi = cast(OffsetHiLUI->getOperand(0))->getSExtValue(); Offset = (OffHi << 12) + OffLo; LLVM_DEBUG(dbgs() << " Detected \" ADDI (LUI Offset_hi), Offset_lo\"\n"); return true; } static void updateTailInstrUsers(SDNode *Tail, SelectionDAG *CurDAG, GlobalAddressSDNode *GAhi, GlobalAddressSDNode *GAlo, SDValue &GlobalHiLUI, SDValue &GlobalLoADDI, int64_t Offset) { // Update the offset in GAhi and GAlo. SDLoc DL(Tail->getOperand(1)); SDValue GAHiNew = CurDAG->getTargetGlobalAddress(GAhi->getGlobal(), DL, GlobalHiLUI.getValueType(), Offset, RISCVII::MO_HI); SDValue GALoNew = CurDAG->getTargetGlobalAddress(GAlo->getGlobal(), DL, GlobalLoADDI.getValueType(), Offset, RISCVII::MO_LO); CurDAG->UpdateNodeOperands(GlobalHiLUI.getNode(), GAHiNew); CurDAG->UpdateNodeOperands(GlobalLoADDI.getNode(), GlobalHiLUI, GALoNew); // Update all uses of the Tail with the GlobalLoADDI. After // this Tail will be a dead node. SDValue From = SDValue(Tail, 0); CurDAG->ReplaceAllUsesOfValuesWith(&From, &GlobalLoADDI, 1); } // TODO: This transformation might be better implemeted in a Machine Funtion // Pass as discussed here: https://reviews.llvm.org/D45748. // // Merge the offset of address calculation into the offset field // of a global address node in a global address lowering sequence ("LUI // %hi(global) --> add %lo(global)") under the following conditions: 1) The // offset field in the global address lowering sequence is zero. 2) The lowered // global address is only used in one node, referred to as "Tail". // This peephole does the following transformations to merge the offset: // 1) ADDI (ADDI (LUI %hi(global)) %lo(global)), offset // ---> // ADDI (LUI %hi(global + offset)) %lo(global + offset). // // This generates: // lui a0, hi (global + offset) // add a0, a0, lo (global + offset) // Instead of // lui a0, hi (global) // addi a0, hi (global) // addi a0, offset // This pattern is for cases when the offset is small enough to fit in the // immediate filed of ADDI (less than 12 bits). // 2) ADD ((ADDI (LUI %hi(global)) %lo(global)), (LUI hi_offset)) // ---> // offset = hi_offset << 12 // ADDI (LUI %hi(global + offset)) %lo(global + offset) // Which generates the ASM: // lui a0, hi(global + offset) // addi a0, lo(global + offset) // Instead of: // lui a0, hi(global) // addi a0, lo(global) // lui a1, (offset) // add a0, a0, a1 // This pattern is for cases when the offset doesn't fit in an immediate field // of ADDI but the lower 12 bits are all zeros. // 3) ADD ((ADDI (LUI %hi(global)) %lo(global)), (ADDI lo_offset, (LUI // hi_offset))) // ---> // ADDI (LUI %hi(global + offset)) %lo(global + offset) // Which generates the ASM: // lui a1, %hi(global + offhi20<<12 + offlo12) // addi a1, %lo(global + offhi20<<12 + offlo12) // Instead of: // lui a0, hi(global) // addi a0, lo(global) // lui a1, (offhi20) // addi a1, (offlo12) // add a0, a0, a1 // This pattern is for cases when the offset doesn't fit in an immediate field // of ADDI and both the lower 1 bits and high 20 bits are non zero. void RISCVDAGToDAGISel::doPeepholeGlobalAddiLuiOffset() { SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); ++Position; SelectionDAG::allnodes_iterator Begin(CurDAG->allnodes_begin()); while (Position != Begin) { SDNode *Tail = &*--Position; // Skip dead nodes and any non-machine opcodes. if (Tail->use_empty() || !Tail->isMachineOpcode()) continue; // The tail instruction can be an ADD or an ADDI. if (!Tail->isMachineOpcode() || !(Tail->getMachineOpcode() == RISCV::ADD || Tail->getMachineOpcode() == RISCV::ADDI)) continue; // First detect the global address part of pattern: // (lui %hi(global) --> Addi %lo(global)) unsigned GlobalLoADDiIdx; SDValue GlobalLoADDI; SDValue GlobalHiLUI; GlobalAddressSDNode *GAhi; GlobalAddressSDNode *GAlo; if (!detectLuiAddiGlobal(Tail, GlobalLoADDiIdx, GlobalLoADDI, GlobalHiLUI, GAlo, GAhi)) continue; LLVM_DEBUG(dbgs() << " Detected \"ADDI LUI %hi(global), %lo(global)\n"); // Detect the offset part for the address calculation by looking at the // other operand of the tail instruction: int64_t Offset; if (Tail->getMachineOpcode() == RISCV::ADD) { // If the Tail is an ADD instruction, the offset can be in two forms: // 1) LUI hi_Offset followed by: // ADDI lo_offset // This happens in case the offset has non zero bits in // both hi 20 and lo 12 bits. // 2) LUI (offset20) // This happens in case the lower 12 bits of the offset are zeros. SDValue OffsetVal = Tail->getOperand(1 - GlobalLoADDiIdx); if (!matchAddiLuiOffset(OffsetVal, Offset) && !matchLuiOffset(OffsetVal, Offset)) continue; } else // The Tail is an ADDI instruction: Offset = cast(Tail->getOperand(1 - GlobalLoADDiIdx)) ->getSExtValue(); LLVM_DEBUG( dbgs() << " Fold offset value into global offset of LUI %hi and ADDI %lo\n"); LLVM_DEBUG(dbgs() << "\tTail:"); LLVM_DEBUG(Tail->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\tGlobalHiLUI:"); LLVM_DEBUG(GlobalHiLUI->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\tGlobalLoADDI:"); LLVM_DEBUG(GlobalLoADDI->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); updateTailInstrUsers(Tail, CurDAG, GAhi, GAlo, GlobalHiLUI, GlobalLoADDI, Offset); } CurDAG->RemoveDeadNodes(); } // Merge an ADDI into the offset of a load/store instruction where possible. // (load (add base, off), 0) -> (load base, off) // (store val, (add base, off)) -> (store val, base, off) void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() { SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); ++Position; while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; // Skip dead nodes and any non-machine opcodes. if (N->use_empty() || !N->isMachineOpcode()) continue; int OffsetOpIdx; int BaseOpIdx; // Only attempt this optimisation for I-type loads and S-type stores. switch (N->getMachineOpcode()) { default: continue; case RISCV::LB: case RISCV::LH: case RISCV::LW: case RISCV::LBU: case RISCV::LHU: case RISCV::LWU: case RISCV::LD: case RISCV::FLW: case RISCV::FLD: BaseOpIdx = 0; OffsetOpIdx = 1; break; case RISCV::SB: case RISCV::SH: case RISCV::SW: case RISCV::SD: case RISCV::FSW: case RISCV::FSD: BaseOpIdx = 1; OffsetOpIdx = 2; break; } // Currently, the load/store offset must be 0 to be considered for this // peephole optimisation. if (!isa(N->getOperand(OffsetOpIdx)) || N->getConstantOperandVal(OffsetOpIdx) != 0) continue; SDValue Base = N->getOperand(BaseOpIdx); // If the base is an ADDI, we can merge it in to the load/store. if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI) continue; SDValue ImmOperand = Base.getOperand(1); if (auto Const = dyn_cast(ImmOperand)) { ImmOperand = CurDAG->getTargetConstant( Const->getSExtValue(), SDLoc(ImmOperand), ImmOperand.getValueType()); } else if (auto GA = dyn_cast(ImmOperand)) { ImmOperand = CurDAG->getTargetGlobalAddress( GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), GA->getOffset(), GA->getTargetFlags()); } else { continue; } LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); LLVM_DEBUG(Base->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\nN: "); LLVM_DEBUG(N->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); // Modify the offset operand of the load/store. if (BaseOpIdx == 0) // Load CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, N->getOperand(2)); else // Store CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), ImmOperand, N->getOperand(3)); // The add-immediate may now be dead, in which case remove it. if (Base.getNode()->use_empty()) CurDAG->RemoveDeadNode(Base.getNode()); } } // Remove redundant BuildPairF64+SplitF64 pairs. i.e. cases where an f64 is // built of two i32 values, only to be split apart again. This must be done // here as a peephole optimisation as the DAG has not been fully legalized at // the point BuildPairF64/SplitF64 nodes are created in RISCVISelLowering, so // some nodes would not yet have been replaced with libcalls. void RISCVDAGToDAGISel::doPeepholeBuildPairF64SplitF64() { SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); ++Position; while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; // Skip dead nodes and any nodes other than SplitF64Pseudo. if (N->use_empty() || !N->isMachineOpcode() || !(N->getMachineOpcode() == RISCV::SplitF64Pseudo)) continue; // If the operand to SplitF64 is a BuildPairF64, the split operation is // redundant. Just use the operands to BuildPairF64 as the result. SDValue F64Val = N->getOperand(0); if (F64Val.isMachineOpcode() && F64Val.getMachineOpcode() == RISCV::BuildPairF64Pseudo) { LLVM_DEBUG( dbgs() << "Removing redundant SplitF64Pseudo and replacing uses " "with BuildPairF64Pseudo operands:\n"); LLVM_DEBUG(dbgs() << "N: "); LLVM_DEBUG(N->dump(CurDAG)); LLVM_DEBUG(dbgs() << "F64Val: "); LLVM_DEBUG(F64Val->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); SDValue From[] = {SDValue(N, 0), SDValue(N, 1)}; SDValue To[] = {F64Val.getOperand(0), F64Val.getOperand(1)}; CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2); } } CurDAG->RemoveDeadNodes(); } // This pass converts a legalized DAG into a RISCV-specific DAG, ready // for instruction scheduling. FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { return new RISCVDAGToDAGISel(TM); }