diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/RISCV/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCV.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp | 452 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 93 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.h | 9 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfo.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoA.td | 125 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 10 |
8 files changed, 693 insertions, 3 deletions
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index f8d4e2b9517..ee5ed625f12 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -15,6 +15,7 @@ add_public_tablegen_target(RISCVCommonTableGen) add_llvm_target(RISCVCodeGen RISCVAsmPrinter.cpp + RISCVExpandPseudoInsts.cpp RISCVFrameLowering.cpp RISCVInstrInfo.cpp RISCVISelDAGToDAG.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 2e4f536aca3..b48a68f76eb 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -16,6 +16,7 @@ #define LLVM_LIB_TARGET_RISCV_RISCV_H #include "MCTargetDesc/RISCVBaseInfo.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { class RISCVTargetMachine; @@ -36,6 +37,9 @@ FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM); FunctionPass *createRISCVMergeBaseOffsetOptPass(); void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &); + +FunctionPass *createRISCVExpandPseudoPass(); +void initializeRISCVExpandPseudoPass(PassRegistry &); } #endif diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp new file mode 100644 index 00000000000..1c23680abc1 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -0,0 +1,452 @@ +//===-- RISCVExpandPseudoInsts.cpp - Expand pseudo instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands pseudo instructions into target +// instructions. This pass should be run after register allocation but before +// the post-regalloc scheduling pass. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVInstrInfo.h" +#include "RISCVTargetMachine.h" + +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +#define RISCV_EXPAND_PSEUDO_NAME "RISCV pseudo instruction expansion pass" + +namespace { + +class RISCVExpandPseudo : public MachineFunctionPass { +public: + const RISCVInstrInfo *TII; + static char ID; + + RISCVExpandPseudo() : MachineFunctionPass(ID) { + initializeRISCVExpandPseudoPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { return RISCV_EXPAND_PSEUDO_NAME; } + +private: + bool expandMBB(MachineBasicBlock &MBB); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicBinOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp, + bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicMinMaxOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + AtomicRMWInst::BinOp, bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI); +}; + +char RISCVExpandPseudo::ID = 0; + +bool RISCVExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo()); + bool Modified = false; + for (auto &MBB : MF) + Modified |= expandMBB(MBB); + return Modified; +} + +bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + switch (MBBI->getOpcode()) { + case RISCV::PseudoAtomicLoadNand32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32, + NextMBBI); + case RISCV::PseudoMaskedAtomicSwap32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32, + NextMBBI); + case RISCV::PseudoMaskedAtomicLoadAdd32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI); + case RISCV::PseudoMaskedAtomicLoadSub32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI); + case RISCV::PseudoMaskedAtomicLoadNand32: + return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32, + NextMBBI); + case RISCV::PseudoMaskedAtomicLoadMax32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32, + NextMBBI); + case RISCV::PseudoMaskedAtomicLoadMin32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32, + NextMBBI); + case RISCV::PseudoMaskedAtomicLoadUMax32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32, + NextMBBI); + case RISCV::PseudoMaskedAtomicLoadUMin32: + return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32, + NextMBBI); + } + + return false; +} + +static unsigned getLRForRMW32(AtomicOrdering Ordering) { + switch (Ordering) { + default: + llvm_unreachable("Unexpected AtomicOrdering"); + case AtomicOrdering::Monotonic: + return RISCV::LR_W; + case AtomicOrdering::Acquire: + return RISCV::LR_W_AQ; + case AtomicOrdering::Release: + return RISCV::LR_W; + case AtomicOrdering::AcquireRelease: + return RISCV::LR_W_AQ; + case AtomicOrdering::SequentiallyConsistent: + return RISCV::LR_W_AQ_RL; + } +} + +static unsigned getSCForRMW32(AtomicOrdering Ordering) { + switch (Ordering) { + default: + llvm_unreachable("Unexpected AtomicOrdering"); + case AtomicOrdering::Monotonic: + return RISCV::SC_W; + case AtomicOrdering::Acquire: + return RISCV::SC_W; + case AtomicOrdering::Release: + return RISCV::SC_W_RL; + case AtomicOrdering::AcquireRelease: + return RISCV::SC_W_RL; + case AtomicOrdering::SequentiallyConsistent: + return RISCV::SC_W_AQ_RL; + } +} + +static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, + DebugLoc DL, MachineBasicBlock *ThisMBB, + MachineBasicBlock *LoopMBB, + MachineBasicBlock *DoneMBB, + AtomicRMWInst::BinOp BinOp, int Width) { + assert(Width == 32 && "RV64 atomic expansion currently unsupported"); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned ScratchReg = MI.getOperand(1).getReg(); + unsigned AddrReg = MI.getOperand(2).getReg(); + unsigned IncrReg = MI.getOperand(3).getReg(); + AtomicOrdering Ordering = + static_cast<AtomicOrdering>(MI.getOperand(4).getImm()); + + // .loop: + // lr.w dest, (addr) + // binop scratch, dest, val + // sc.w scratch, scratch, (addr) + // bnez scratch, loop + BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg) + .addReg(AddrReg); + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Nand: + BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg) + .addReg(ScratchReg) + .addImm(-1); + break; + } + BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg) + .addReg(AddrReg) + .addReg(ScratchReg); + BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) + .addReg(ScratchReg) + .addReg(RISCV::X0) + .addMBB(LoopMBB); +} + +static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL, + MachineBasicBlock *MBB, unsigned DestReg, + unsigned OldValReg, unsigned NewValReg, + unsigned MaskReg, unsigned ScratchReg) { + assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique"); + assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique"); + assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique"); + + // We select bits from newval and oldval using: + // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge + // r = oldval ^ ((oldval ^ newval) & masktargetdata); + BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg) + .addReg(OldValReg) + .addReg(NewValReg); + BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg) + .addReg(ScratchReg) + .addReg(MaskReg); + BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg) + .addReg(OldValReg) + .addReg(ScratchReg); +} + +static void doMaskedAtomicBinOpExpansion( + const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL, + MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, + MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) { + assert(Width == 32 && "RV64 atomic expansion currently unsupported"); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned ScratchReg = MI.getOperand(1).getReg(); + unsigned AddrReg = MI.getOperand(2).getReg(); + unsigned IncrReg = MI.getOperand(3).getReg(); + unsigned MaskReg = MI.getOperand(4).getReg(); + AtomicOrdering Ordering = + static_cast<AtomicOrdering>(MI.getOperand(5).getImm()); + + // .loop: + // lr.w destreg, (alignedaddr) + // binop scratch, destreg, incr + // xor scratch, destreg, scratch + // and scratch, scratch, masktargetdata + // xor scratch, destreg, scratch + // sc.w scratch, scratch, (alignedaddr) + // bnez scratch, loop + BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg) + .addReg(AddrReg); + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg) + .addReg(RISCV::X0) + .addReg(IncrReg); + break; + case AtomicRMWInst::Add: + BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Sub: + BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + break; + case AtomicRMWInst::Nand: + BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg) + .addReg(DestReg) + .addReg(IncrReg); + BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg) + .addReg(ScratchReg) + .addImm(-1); + break; + } + + insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg, + ScratchReg); + + BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg) + .addReg(AddrReg) + .addReg(ScratchReg); + BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) + .addReg(ScratchReg) + .addReg(RISCV::X0) + .addMBB(LoopMBB); +} + +bool RISCVExpandPseudo::expandAtomicBinOp( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + MachineFunction *MF = MBB.getParent(); + auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopMBB); + MF->insert(++LoopMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopMBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopMBB); + + if (!IsMasked) + doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width); + else + doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, + Width); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *LoopMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +} + +static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL, + MachineBasicBlock *MBB, unsigned ValReg, + unsigned ShamtReg) { + BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg) + .addReg(ValReg) + .addReg(ShamtReg); + BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg) + .addReg(ValReg) + .addReg(ShamtReg); +} + +bool RISCVExpandPseudo::expandAtomicMinMaxOp( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width, + MachineBasicBlock::iterator &NextMBBI) { + assert(IsMasked == true && + "Should only need to expand masked atomic max/min"); + assert(Width == 32 && "RV64 atomic expansion currently unsupported"); + + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs. + MF->insert(++MBB.getIterator(), LoopHeadMBB); + MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); + MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); + MF->insert(++LoopTailMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopHeadMBB->addSuccessor(LoopIfBodyMBB); + LoopHeadMBB->addSuccessor(LoopTailMBB); + LoopIfBodyMBB->addSuccessor(LoopTailMBB); + LoopTailMBB->addSuccessor(LoopHeadMBB); + LoopTailMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopHeadMBB); + + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned Scratch1Reg = MI.getOperand(1).getReg(); + unsigned Scratch2Reg = MI.getOperand(2).getReg(); + unsigned AddrReg = MI.getOperand(3).getReg(); + unsigned IncrReg = MI.getOperand(4).getReg(); + unsigned MaskReg = MI.getOperand(5).getReg(); + bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max; + AtomicOrdering Ordering = + static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm()); + + // + // .loophead: + // lr.w destreg, (alignedaddr) + // and scratch2, destreg, mask + // mv scratch1, destreg + // [sext scratch2 if signed min/max] + // ifnochangeneeded scratch2, incr, .looptail + BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg) + .addReg(AddrReg); + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg) + .addReg(DestReg) + .addReg(MaskReg); + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg) + .addReg(DestReg) + .addImm(0); + + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Max: { + insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg()); + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE)) + .addReg(Scratch2Reg) + .addReg(IncrReg) + .addMBB(LoopTailMBB); + break; + } + case AtomicRMWInst::Min: { + insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg()); + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE)) + .addReg(IncrReg) + .addReg(Scratch2Reg) + .addMBB(LoopTailMBB); + break; + } + case AtomicRMWInst::UMax: + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU)) + .addReg(Scratch2Reg) + .addReg(IncrReg) + .addMBB(LoopTailMBB); + break; + case AtomicRMWInst::UMin: + BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU)) + .addReg(IncrReg) + .addReg(Scratch2Reg) + .addMBB(LoopTailMBB); + break; + } + + // .loopifbody: + // xor scratch1, destreg, incr + // and scratch1, scratch1, mask + // xor scratch1, destreg, scratch1 + insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg, + MaskReg, Scratch1Reg); + + // .looptail: + // sc.w scratch1, scratch1, (addr) + // bnez scratch1, loop + BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg) + .addReg(AddrReg) + .addReg(Scratch1Reg); + BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) + .addReg(Scratch1Reg) + .addReg(RISCV::X0) + .addMBB(LoopHeadMBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); + computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB); + computeAndAddLiveIns(LiveRegs, *LoopTailMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +} + +} // end of anonymous namespace + +INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo", + RISCV_EXPAND_PSEUDO_NAME, false, false) +namespace llvm { + +FunctionPass *createRISCVExpandPseudoPass() { return new RISCVExpandPseudo(); } + +} // end of namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6f2e6b40038..44da49c23d5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -137,10 +137,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BlockAddress, XLenVT, Custom); setOperationAction(ISD::ConstantPool, XLenVT, Custom); - if (Subtarget.hasStdExtA()) + if (Subtarget.hasStdExtA()) { setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); - else + setMinCmpXchgSizeInBits(32); + } else { setMaxAtomicSizeInBitsSupported(0); + } setBooleanContents(ZeroOrOneBooleanContent); @@ -160,6 +162,33 @@ EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, return VT.changeVectorElementTypeToInteger(); } +bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + MachineFunction &MF, + unsigned Intrinsic) const { + switch (Intrinsic) { + default: + return false; + case Intrinsic::riscv_masked_atomicrmw_xchg_i32: + case Intrinsic::riscv_masked_atomicrmw_add_i32: + case Intrinsic::riscv_masked_atomicrmw_sub_i32: + case Intrinsic::riscv_masked_atomicrmw_nand_i32: + case Intrinsic::riscv_masked_atomicrmw_max_i32: + case Intrinsic::riscv_masked_atomicrmw_min_i32: + case Intrinsic::riscv_masked_atomicrmw_umax_i32: + case Intrinsic::riscv_masked_atomicrmw_umin_i32: + PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = 4; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | + MachineMemOperand::MOVolatile; + return true; + } +} + bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, @@ -1596,3 +1625,63 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, return Builder.CreateFence(AtomicOrdering::Acquire); return nullptr; } + +TargetLowering::AtomicExpansionKind +RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + if (Size == 8 || Size == 16) + return AtomicExpansionKind::MaskedIntrinsic; + return AtomicExpansionKind::None; +} + +static Intrinsic::ID +getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) { + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + return Intrinsic::riscv_masked_atomicrmw_xchg_i32; + case AtomicRMWInst::Add: + return Intrinsic::riscv_masked_atomicrmw_add_i32; + case AtomicRMWInst::Sub: + return Intrinsic::riscv_masked_atomicrmw_sub_i32; + case AtomicRMWInst::Nand: + return Intrinsic::riscv_masked_atomicrmw_nand_i32; + case AtomicRMWInst::Max: + return Intrinsic::riscv_masked_atomicrmw_max_i32; + case AtomicRMWInst::Min: + return Intrinsic::riscv_masked_atomicrmw_min_i32; + case AtomicRMWInst::UMax: + return Intrinsic::riscv_masked_atomicrmw_umax_i32; + case AtomicRMWInst::UMin: + return Intrinsic::riscv_masked_atomicrmw_umin_i32; + } +} + +Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( + IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, + Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { + Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering())); + Type *Tys[] = {AlignedAddr->getType()}; + Function *LrwOpScwLoop = Intrinsic::getDeclaration( + AI->getModule(), + getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys); + + // Must pass the shift amount needed to sign extend the loaded value prior + // to performing a signed comparison for min/max. ShiftAmt is the number of + // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which + // is the number of bits to left+right shift the value in order to + // sign-extend. + if (AI->getOperation() == AtomicRMWInst::Min || + AI->getOperation() == AtomicRMWInst::Max) { + const DataLayout &DL = AI->getModule()->getDataLayout(); + unsigned ValWidth = + DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); + Value *SextShamt = Builder.CreateSub( + Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt); + return Builder.CreateCall(LrwOpScwLoop, + {AlignedAddr, Incr, Mask, SextShamt, Ordering}); + } + + return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); +} diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 3e3e67b9188..e21951140b3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -43,6 +43,9 @@ public: explicit RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI); + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, + unsigned Intrinsic) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I = nullptr) const override; @@ -115,6 +118,12 @@ private: bool IsEligibleForTailCallOptimization(CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, const SmallVector<CCValAssign, 16> &ArgLocs) const; + + TargetLowering::AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + virtual Value *emitMaskedAtomicRMWIntrinsic( + IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, + Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override; }; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 720dd78c4ac..dd739f03f9b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -603,7 +603,7 @@ def : MnemonicAlias<"sbreak", "ebreak">; /// Generic pattern classes -class PatGprGpr<SDPatternOperator OpNode, RVInstR Inst> +class PatGprGpr<SDPatternOperator OpNode, RVInst Inst> : Pat<(OpNode GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>; class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst> : Pat<(OpNode GPR:$rs1, simm12:$imm12), (Inst GPR:$rs1, simm12:$imm12)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index ef46892b9ff..bf8c835918a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -105,4 +105,129 @@ defm : LdPat<atomic_load_32, LW>; defm : AtomicStPat<atomic_store_8, SB, GPR>; defm : AtomicStPat<atomic_store_16, SH, GPR>; defm : AtomicStPat<atomic_store_32, SW, GPR>; + +/// AMOs + +multiclass AMOPat<string AtomicOp, string BaseInst> { + def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"), + !cast<RVInst>(BaseInst)>; + def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"), + !cast<RVInst>(BaseInst#"_AQ")>; + def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"), + !cast<RVInst>(BaseInst#"_RL")>; + def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"), + !cast<RVInst>(BaseInst#"_AQ_RL")>; + def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"), + !cast<RVInst>(BaseInst#"_AQ_RL")>; +} + +defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">; +defm : AMOPat<"atomic_load_add_32", "AMOADD_W">; +defm : AMOPat<"atomic_load_and_32", "AMOAND_W">; +defm : AMOPat<"atomic_load_or_32", "AMOOR_W">; +defm : AMOPat<"atomic_load_xor_32", "AMOXOR_W">; +defm : AMOPat<"atomic_load_max_32", "AMOMAX_W">; +defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">; +defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">; +defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">; + +def : Pat<(atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr), + (AMOADD_W GPR:$addr, (SUB X0, GPR:$incr))>; +def : Pat<(atomic_load_sub_32_acquire GPR:$addr, GPR:$incr), + (AMOADD_W_AQ GPR:$addr, (SUB X0, GPR:$incr))>; +def : Pat<(atomic_load_sub_32_release GPR:$addr, GPR:$incr), + (AMOADD_W_RL GPR:$addr, (SUB X0, GPR:$incr))>; +def : Pat<(atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr), + (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>; +def : Pat<(atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr), + (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>; + +/// Pseudo AMOs + +class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$incr, i32imm:$ordering), []> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} + +def PseudoAtomicLoadNand32 : PseudoAMO; +// Ordering constants must be kept in sync with the AtomicOrdering enum in +// AtomicOrdering.h. +def : Pat<(atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>; +def : Pat<(atomic_load_nand_32_acquire GPR:$addr, GPR:$incr), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>; +def : Pat<(atomic_load_nand_32_release GPR:$addr, GPR:$incr), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>; +def : Pat<(atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>; +def : Pat<(atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>; + +class PseudoMaskedAMO + : Pseudo<(outs GPR:$res, GPR:$scratch), + (ins GPR:$addr, GPR:$incr, GPR:$mask, i32imm:$ordering), []> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} + +class PseudoMaskedAMOMinMax + : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2), + (ins GPR:$addr, GPR:$incr, GPR:$mask, i32imm:$sextshamt, + i32imm:$ordering), []> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch1," + "@earlyclobber $scratch2"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} + +class PseudoMaskedAMOUMinUMax + : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2), + (ins GPR:$addr, GPR:$incr, GPR:$mask, i32imm:$ordering), []> { + let Constraints = "@earlyclobber $res,@earlyclobber $scratch1," + "@earlyclobber $scratch2"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; +} + +class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst> + : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering), + (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>; + +class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst> + : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, + imm:$ordering), + (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, + imm:$ordering)>; + +def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32, + PseudoMaskedAtomicSwap32>; +def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i32, + PseudoMaskedAtomicLoadAdd32>; +def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i32, + PseudoMaskedAtomicLoadSub32>; +def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i32, + PseudoMaskedAtomicLoadNand32>; +def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax; +def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i32, + PseudoMaskedAtomicLoadMax32>; +def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax; +def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i32, + PseudoMaskedAtomicLoadMin32>; +def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i32, + PseudoMaskedAtomicLoadUMax32>; +def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32, + PseudoMaskedAtomicLoadUMin32>; } // Predicates = [HasStdExtA] diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index a2ebf5bf3e6..e75da768ee2 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -27,6 +27,8 @@ using namespace llvm; extern "C" void LLVMInitializeRISCVTarget() { RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target()); RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target()); + auto PR = PassRegistry::getPassRegistry(); + initializeRISCVExpandPseudoPass(*PR); } static std::string computeDataLayout(const Triple &TT) { @@ -78,6 +80,7 @@ public: void addIRPasses() override; bool addInstSelector() override; void addPreEmitPass() override; + void addPreEmitPass2() override; void addPreRegAlloc() override; }; } @@ -99,6 +102,13 @@ bool RISCVPassConfig::addInstSelector() { void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } +void RISCVPassConfig::addPreEmitPass2() { + // Schedule the expansion of AMOs at the last possible moment, avoiding the + // possibility for other passes to break the requirements for forward + // progress in the LR/SC block. + addPass(createRISCVExpandPseudoPass()); +} + void RISCVPassConfig::addPreRegAlloc() { addPass(createRISCVMergeBaseOffsetOptPass()); } |