summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAlex Bradbury <asb@lowrisc.org>2018-09-19 10:54:22 +0000
committerAlex Bradbury <asb@lowrisc.org>2018-09-19 10:54:22 +0000
commit21aea51e71614b15545c69f84310b938520b069c (patch)
tree628636b1c733777c5152513fba3c7bc2da48b37a /llvm/lib
parente8d8aee537d4c2ecee173a46d14b7720b200bf64 (diff)
downloadbcm5719-llvm-21aea51e71614b15545c69f84310b938520b069c.tar.gz
bcm5719-llvm-21aea51e71614b15545c69f84310b938520b069c.zip
[RISCV] Codegen for i8, i16, and i32 atomicrmw with RV32A
Introduce a new RISCVExpandPseudoInsts pass to expand atomic pseudo-instructions after register allocation. This is necessary in order to ensure that register spills aren't introduced between LL and SC, thus breaking the forward progress guarantee for the operation. AArch64 does something similar for CmpXchg (though only at O0), and Mips is moving towards this approach (see D31287). See also [this mailing list post](http://lists.llvm.org/pipermail/llvm-dev/2016-May/099490.html) from James Knight, which summarises the issues with lowering to ll/sc in IR or pre-RA. See the [accompanying RFC thread](http://lists.llvm.org/pipermail/llvm-dev/2018-June/123993.html) for an overview of the lowering strategy. Differential Revision: https://reviews.llvm.org/D47882 llvm-svn: 342534
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp38
-rw-r--r--llvm/lib/Target/RISCV/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/RISCV/RISCV.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp452
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp93
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h9
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoA.td125
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp10
9 files changed, 730 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index b55afed2d68..3a283ca7048 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -90,6 +90,7 @@ namespace {
TargetLoweringBase::AtomicExpansionKind ExpansionKind);
AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
+ void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
static Value *insertRMWCmpXchgLoop(
@@ -411,8 +412,9 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
return expandAtomicLoadToLL(LI);
case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
return expandAtomicLoadToCmpXchg(LI);
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
- llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
@@ -574,6 +576,10 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
}
return true;
}
+ case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
+ expandAtomicRMWToMaskedIntrinsic(AI);
+ return true;
+ }
default:
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
@@ -662,6 +668,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
IRBuilder<> &Builder, Value *Loaded,
Value *Shifted_Inc, Value *Inc,
const PartwordMaskValues &PMV) {
+ // TODO: update to use
+ // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
+ // to merge bits from two values without requiring PMV.Inv_Mask.
switch (Op) {
case AtomicRMWInst::Xchg: {
Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
@@ -914,6 +923,33 @@ void AtomicExpand::expandAtomicOpToLLSC(
I->eraseFromParent();
}
+void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
+ IRBuilder<> Builder(AI);
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+ TLI->getMinCmpXchgSizeInBits() / 8);
+
+ // The value operand must be sign-extended for signed min/max so that the
+ // target's signed comparison instructions can be used. Otherwise, just
+ // zero-ext.
+ Instruction::CastOps CastOp = Instruction::ZExt;
+ AtomicRMWInst::BinOp RMWOp = AI->getOperation();
+ if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
+ CastOp = Instruction::SExt;
+
+ Value *ValOperand_Shifted = Builder.CreateShl(
+ Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
+ PMV.ShiftAmt, "ValOperand_Shifted");
+ Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
+ Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
+ AI->getOrdering());
+ Value *FinalOldResult = Builder.CreateTrunc(
+ Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+ AI->replaceAllUsesWith(FinalOldResult);
+ AI->eraseFromParent();
+}
+
Value *AtomicExpand::insertRMWLLSCLoop(
IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
AtomicOrdering MemOpOrder,
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index f8d4e2b9517..ee5ed625f12 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -15,6 +15,7 @@ add_public_tablegen_target(RISCVCommonTableGen)
add_llvm_target(RISCVCodeGen
RISCVAsmPrinter.cpp
+ RISCVExpandPseudoInsts.cpp
RISCVFrameLowering.cpp
RISCVInstrInfo.cpp
RISCVISelDAGToDAG.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 2e4f536aca3..b48a68f76eb 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -16,6 +16,7 @@
#define LLVM_LIB_TARGET_RISCV_RISCV_H
#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
class RISCVTargetMachine;
@@ -36,6 +37,9 @@ FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM);
FunctionPass *createRISCVMergeBaseOffsetOptPass();
void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
+
+FunctionPass *createRISCVExpandPseudoPass();
+void initializeRISCVExpandPseudoPass(PassRegistry &);
}
#endif
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
new file mode 100644
index 00000000000..1c23680abc1
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -0,0 +1,452 @@
+//===-- RISCVExpandPseudoInsts.cpp - Expand pseudo instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVTargetMachine.h"
+
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#define RISCV_EXPAND_PSEUDO_NAME "RISCV pseudo instruction expansion pass"
+
+namespace {
+
+class RISCVExpandPseudo : public MachineFunctionPass {
+public:
+ const RISCVInstrInfo *TII;
+ static char ID;
+
+ RISCVExpandPseudo() : MachineFunctionPass(ID) {
+ initializeRISCVExpandPseudoPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return RISCV_EXPAND_PSEUDO_NAME; }
+
+private:
+ bool expandMBB(MachineBasicBlock &MBB);
+ bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicBinOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
+ bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI);
+};
+
+char RISCVExpandPseudo::ID = 0;
+
+bool RISCVExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= expandMBB(MBB);
+ return Modified;
+}
+
+bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= expandMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ switch (MBBI->getOpcode()) {
+ case RISCV::PseudoAtomicLoadNand32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicSwap32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadAdd32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadSub32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadNand32:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadUMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
+ NextMBBI);
+ case RISCV::PseudoMaskedAtomicLoadUMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
+ NextMBBI);
+ }
+
+ return false;
+}
+
+static unsigned getLRForRMW32(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::LR_W;
+ case AtomicOrdering::Acquire:
+ return RISCV::LR_W_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::LR_W;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::LR_W_AQ;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::LR_W_AQ_RL;
+ }
+}
+
+static unsigned getSCForRMW32(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::SC_W;
+ case AtomicOrdering::Acquire:
+ return RISCV::SC_W;
+ case AtomicOrdering::Release:
+ return RISCV::SC_W_RL;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::SC_W_RL;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::SC_W_AQ_RL;
+ }
+}
+
+static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
+ DebugLoc DL, MachineBasicBlock *ThisMBB,
+ MachineBasicBlock *LoopMBB,
+ MachineBasicBlock *DoneMBB,
+ AtomicRMWInst::BinOp BinOp, int Width) {
+ assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned ScratchReg = MI.getOperand(1).getReg();
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned IncrReg = MI.getOperand(3).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
+
+ // .loop:
+ // lr.w dest, (addr)
+ // binop scratch, dest, val
+ // sc.w scratch, scratch, (addr)
+ // bnez scratch, loop
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Nand:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(-1);
+ break;
+ }
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(ScratchReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopMBB);
+}
+
+static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
+ MachineBasicBlock *MBB, unsigned DestReg,
+ unsigned OldValReg, unsigned NewValReg,
+ unsigned MaskReg, unsigned ScratchReg) {
+ assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
+ assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
+ assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
+
+ // We select bits from newval and oldval using:
+ // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
+ // r = oldval ^ ((oldval ^ newval) & masktargetdata);
+ BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
+ .addReg(OldValReg)
+ .addReg(NewValReg);
+ BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(MaskReg);
+ BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
+ .addReg(OldValReg)
+ .addReg(ScratchReg);
+}
+
+static void doMaskedAtomicBinOpExpansion(
+ const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
+ MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
+ MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
+ assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned ScratchReg = MI.getOperand(1).getReg();
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned IncrReg = MI.getOperand(3).getReg();
+ unsigned MaskReg = MI.getOperand(4).getReg();
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
+
+ // .loop:
+ // lr.w destreg, (alignedaddr)
+ // binop scratch, destreg, incr
+ // xor scratch, destreg, scratch
+ // and scratch, scratch, masktargetdata
+ // xor scratch, destreg, scratch
+ // sc.w scratch, scratch, (alignedaddr)
+ // bnez scratch, loop
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+ .addReg(RISCV::X0)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Add:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Sub:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ break;
+ case AtomicRMWInst::Nand:
+ BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(IncrReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(-1);
+ break;
+ }
+
+ insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
+ ScratchReg);
+
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ .addReg(AddrReg)
+ .addReg(ScratchReg);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+ .addReg(ScratchReg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopMBB);
+}
+
+bool RISCVExpandPseudo::expandAtomicBinOp(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ MachineFunction *MF = MBB.getParent();
+ auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopMBB);
+ MF->insert(++LoopMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopMBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopMBB);
+
+ if (!IsMasked)
+ doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
+ else
+ doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
+ Width);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
+static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
+ MachineBasicBlock *MBB, unsigned ValReg,
+ unsigned ShamtReg) {
+ BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
+ .addReg(ValReg)
+ .addReg(ShamtReg);
+ BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
+ .addReg(ValReg)
+ .addReg(ShamtReg);
+}
+
+bool RISCVExpandPseudo::expandAtomicMinMaxOp(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI) {
+ assert(IsMasked == true &&
+ "Should only need to expand masked atomic max/min");
+ assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
+ MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopIfBodyMBB->addSuccessor(LoopTailMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned Scratch1Reg = MI.getOperand(1).getReg();
+ unsigned Scratch2Reg = MI.getOperand(2).getReg();
+ unsigned AddrReg = MI.getOperand(3).getReg();
+ unsigned IncrReg = MI.getOperand(4).getReg();
+ unsigned MaskReg = MI.getOperand(5).getReg();
+ bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
+
+ //
+ // .loophead:
+ // lr.w destreg, (alignedaddr)
+ // and scratch2, destreg, mask
+ // mv scratch1, destreg
+ // [sext scratch2 if signed min/max]
+ // ifnochangeneeded scratch2, incr, .looptail
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ .addReg(AddrReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
+ .addReg(DestReg)
+ .addImm(0);
+
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Max: {
+ insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(Scratch2Reg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::Min: {
+ insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
+ .addReg(IncrReg)
+ .addReg(Scratch2Reg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+ case AtomicRMWInst::UMax:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(Scratch2Reg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ case AtomicRMWInst::UMin:
+ BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
+ .addReg(IncrReg)
+ .addReg(Scratch2Reg)
+ .addMBB(LoopTailMBB);
+ break;
+ }
+
+ // .loopifbody:
+ // xor scratch1, destreg, incr
+ // and scratch1, scratch1, mask
+ // xor scratch1, destreg, scratch1
+ insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
+ MaskReg, Scratch1Reg);
+
+ // .looptail:
+ // sc.w scratch1, scratch1, (addr)
+ // bnez scratch1, loop
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
+ .addReg(AddrReg)
+ .addReg(Scratch1Reg);
+ BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
+ .addReg(Scratch1Reg)
+ .addReg(RISCV::X0)
+ .addMBB(LoopHeadMBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo",
+ RISCV_EXPAND_PSEUDO_NAME, false, false)
+namespace llvm {
+
+FunctionPass *createRISCVExpandPseudoPass() { return new RISCVExpandPseudo(); }
+
+} // end of namespace llvm
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6f2e6b40038..44da49c23d5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -137,10 +137,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BlockAddress, XLenVT, Custom);
setOperationAction(ISD::ConstantPool, XLenVT, Custom);
- if (Subtarget.hasStdExtA())
+ if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
- else
+ setMinCmpXchgSizeInBits(32);
+ } else {
setMaxAtomicSizeInBitsSupported(0);
+ }
setBooleanContents(ZeroOrOneBooleanContent);
@@ -160,6 +162,33 @@ EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
return VT.changeVectorElementTypeToInteger();
}
+bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ MachineFunction &MF,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ default:
+ return false;
+ case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
+ case Intrinsic::riscv_masked_atomicrmw_add_i32:
+ case Intrinsic::riscv_masked_atomicrmw_sub_i32:
+ case Intrinsic::riscv_masked_atomicrmw_nand_i32:
+ case Intrinsic::riscv_masked_atomicrmw_max_i32:
+ case Intrinsic::riscv_masked_atomicrmw_min_i32:
+ case Intrinsic::riscv_masked_atomicrmw_umax_i32:
+ case Intrinsic::riscv_masked_atomicrmw_umin_i32:
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 4;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
+ return true;
+ }
+}
+
bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS,
@@ -1596,3 +1625,63 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
return Builder.CreateFence(AtomicOrdering::Acquire);
return nullptr;
}
+
+TargetLowering::AtomicExpansionKind
+RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+ if (Size == 8 || Size == 16)
+ return AtomicExpansionKind::MaskedIntrinsic;
+ return AtomicExpansionKind::None;
+}
+
+static Intrinsic::ID
+getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) {
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
+ case AtomicRMWInst::Add:
+ return Intrinsic::riscv_masked_atomicrmw_add_i32;
+ case AtomicRMWInst::Sub:
+ return Intrinsic::riscv_masked_atomicrmw_sub_i32;
+ case AtomicRMWInst::Nand:
+ return Intrinsic::riscv_masked_atomicrmw_nand_i32;
+ case AtomicRMWInst::Max:
+ return Intrinsic::riscv_masked_atomicrmw_max_i32;
+ case AtomicRMWInst::Min:
+ return Intrinsic::riscv_masked_atomicrmw_min_i32;
+ case AtomicRMWInst::UMax:
+ return Intrinsic::riscv_masked_atomicrmw_umax_i32;
+ case AtomicRMWInst::UMin:
+ return Intrinsic::riscv_masked_atomicrmw_umin_i32;
+ }
+}
+
+Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
+ IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
+ Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
+ Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering()));
+ Type *Tys[] = {AlignedAddr->getType()};
+ Function *LrwOpScwLoop = Intrinsic::getDeclaration(
+ AI->getModule(),
+ getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys);
+
+ // Must pass the shift amount needed to sign extend the loaded value prior
+ // to performing a signed comparison for min/max. ShiftAmt is the number of
+ // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
+ // is the number of bits to left+right shift the value in order to
+ // sign-extend.
+ if (AI->getOperation() == AtomicRMWInst::Min ||
+ AI->getOperation() == AtomicRMWInst::Max) {
+ const DataLayout &DL = AI->getModule()->getDataLayout();
+ unsigned ValWidth =
+ DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
+ Value *SextShamt = Builder.CreateSub(
+ Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt);
+ return Builder.CreateCall(LrwOpScwLoop,
+ {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+ }
+
+ return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 3e3e67b9188..e21951140b3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -43,6 +43,9 @@ public:
explicit RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI);
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ MachineFunction &MF,
+ unsigned Intrinsic) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS,
Instruction *I = nullptr) const override;
@@ -115,6 +118,12 @@ private:
bool IsEligibleForTailCallOptimization(CCState &CCInfo,
CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVector<CCValAssign, 16> &ArgLocs) const;
+
+ TargetLowering::AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ virtual Value *emitMaskedAtomicRMWIntrinsic(
+ IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
+ Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override;
};
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 720dd78c4ac..dd739f03f9b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -603,7 +603,7 @@ def : MnemonicAlias<"sbreak", "ebreak">;
/// Generic pattern classes
-class PatGprGpr<SDPatternOperator OpNode, RVInstR Inst>
+class PatGprGpr<SDPatternOperator OpNode, RVInst Inst>
: Pat<(OpNode GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>;
class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst>
: Pat<(OpNode GPR:$rs1, simm12:$imm12), (Inst GPR:$rs1, simm12:$imm12)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index ef46892b9ff..bf8c835918a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -105,4 +105,129 @@ defm : LdPat<atomic_load_32, LW>;
defm : AtomicStPat<atomic_store_8, SB, GPR>;
defm : AtomicStPat<atomic_store_16, SH, GPR>;
defm : AtomicStPat<atomic_store_32, SW, GPR>;
+
+/// AMOs
+
+multiclass AMOPat<string AtomicOp, string BaseInst> {
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
+ !cast<RVInst>(BaseInst)>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
+ !cast<RVInst>(BaseInst#"_AQ")>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"),
+ !cast<RVInst>(BaseInst#"_RL")>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"),
+ !cast<RVInst>(BaseInst#"_AQ_RL")>;
+ def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
+ !cast<RVInst>(BaseInst#"_AQ_RL")>;
+}
+
+defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">;
+defm : AMOPat<"atomic_load_add_32", "AMOADD_W">;
+defm : AMOPat<"atomic_load_and_32", "AMOAND_W">;
+defm : AMOPat<"atomic_load_or_32", "AMOOR_W">;
+defm : AMOPat<"atomic_load_xor_32", "AMOXOR_W">;
+defm : AMOPat<"atomic_load_max_32", "AMOMAX_W">;
+defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
+defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
+defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;
+
+def : Pat<(atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr),
+ (AMOADD_W GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_acquire GPR:$addr, GPR:$incr),
+ (AMOADD_W_AQ GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_release GPR:$addr, GPR:$incr),
+ (AMOADD_W_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr),
+ (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr),
+ (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+
+/// Pseudo AMOs
+
+class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch),
+ (ins GPR:$addr, GPR:$incr, i32imm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+def PseudoAtomicLoadNand32 : PseudoAMO;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+def : Pat<(atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>;
+def : Pat<(atomic_load_nand_32_acquire GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>;
+def : Pat<(atomic_load_nand_32_release GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>;
+def : Pat<(atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>;
+def : Pat<(atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>;
+
+class PseudoMaskedAMO
+ : Pseudo<(outs GPR:$res, GPR:$scratch),
+ (ins GPR:$addr, GPR:$incr, GPR:$mask, i32imm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOMinMax
+ : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+ (ins GPR:$addr, GPR:$incr, GPR:$mask, i32imm:$sextshamt,
+ i32imm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
+ "@earlyclobber $scratch2";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOUMinUMax
+ : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+ (ins GPR:$addr, GPR:$incr, GPR:$mask, i32imm:$ordering), []> {
+ let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
+ "@earlyclobber $scratch2";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
+ : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering),
+ (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>;
+
+class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
+ : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+ imm:$ordering),
+ (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+ imm:$ordering)>;
+
+def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
+ PseudoMaskedAtomicSwap32>;
+def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i32,
+ PseudoMaskedAtomicLoadAdd32>;
+def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i32,
+ PseudoMaskedAtomicLoadSub32>;
+def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i32,
+ PseudoMaskedAtomicLoadNand32>;
+def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i32,
+ PseudoMaskedAtomicLoadMax32>;
+def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i32,
+ PseudoMaskedAtomicLoadMin32>;
+def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i32,
+ PseudoMaskedAtomicLoadUMax32>;
+def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
+ PseudoMaskedAtomicLoadUMin32>;
} // Predicates = [HasStdExtA]
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index a2ebf5bf3e6..e75da768ee2 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -27,6 +27,8 @@ using namespace llvm;
extern "C" void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
+ auto PR = PassRegistry::getPassRegistry();
+ initializeRISCVExpandPseudoPass(*PR);
}
static std::string computeDataLayout(const Triple &TT) {
@@ -78,6 +80,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
void addPreEmitPass() override;
+ void addPreEmitPass2() override;
void addPreRegAlloc() override;
};
}
@@ -99,6 +102,13 @@ bool RISCVPassConfig::addInstSelector() {
void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
+void RISCVPassConfig::addPreEmitPass2() {
+ // Schedule the expansion of AMOs at the last possible moment, avoiding the
+ // possibility for other passes to break the requirements for forward
+ // progress in the LR/SC block.
+ addPass(createRISCVExpandPseudoPass());
+}
+
void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVMergeBaseOffsetOptPass());
}
OpenPOWER on IntegriCloud