diff options
author | Simon Dardis <simon.dardis@imgtec.com> | 2017-02-24 16:32:18 +0000 |
---|---|---|
committer | Simon Dardis <simon.dardis@imgtec.com> | 2017-02-24 16:32:18 +0000 |
commit | ae6f2bcb258472b3ed89b24b8fa6989395c059e3 (patch) | |
tree | aa70d33c1f2346b99231e5eca9cdcc417ee9d0d5 /llvm/lib/Target/Mips/MipsISelLowering.cpp | |
parent | 3c58c18ff0f9670f5d4d493263f2cd7dc84258c0 (diff) | |
download | bcm5719-llvm-ae6f2bcb258472b3ed89b24b8fa6989395c059e3.tar.gz bcm5719-llvm-ae6f2bcb258472b3ed89b24b8fa6989395c059e3.zip |
Recommit "[mips] Fix atomic compare and swap at O0."
This time with the missing files.
Similar to PR/25526, fast-regalloc introduces spills at the end of basic
blocks. When this occurs in between an ll and sc, the store can cause the
atomic sequence to fail.
This patch fixes the issue by introducing more pseudos to represent atomic
operations and moving their lowering to after the expansion of postRA
pseudos.
This resolves PR/32020.
Thanks to James Cowgill for reporting the issue!
Reviewers: slthakur
Differential Revision: https://reviews.llvm.org/D30257
llvm-svn: 296134
Diffstat (limited to 'llvm/lib/Target/Mips/MipsISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/Mips/MipsISelLowering.cpp | 180 |
1 files changed, 32 insertions, 148 deletions
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index f0f2424f722..1e72f4e47ec 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -1053,14 +1053,11 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case Mips::ATOMIC_SWAP_I64: return emitAtomicBinary(MI, BB, 8, 0); - case Mips::ATOMIC_CMP_SWAP_I8: + case Mips::ATOMIC_CMP_SWAP_I8_PSEUDO: return emitAtomicCmpSwapPartword(MI, BB, 1); - case Mips::ATOMIC_CMP_SWAP_I16: + case Mips::ATOMIC_CMP_SWAP_I16_PSEUDO: return emitAtomicCmpSwapPartword(MI, BB, 2); - case Mips::ATOMIC_CMP_SWAP_I32: - return emitAtomicCmpSwap(MI, BB, 4); - case Mips::ATOMIC_CMP_SWAP_I64: - return emitAtomicCmpSwap(MI, BB, 8); + case Mips::PseudoSDIV: case Mips::PseudoUDIV: case Mips::DIV: @@ -1407,96 +1404,6 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( return exitMBB; } -MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, - MachineBasicBlock *BB, - unsigned Size) const { - assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap."); - - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - const bool ArePtrs64bit = ABI.ArePtrs64bit(); - DebugLoc DL = MI.getDebugLoc(); - unsigned LL, SC, ZERO, BNE, BEQ; - - if (Size == 4) { - if (isMicroMips) { - LL = Mips::LL_MM; - SC = Mips::SC_MM; - } else { - LL = Subtarget.hasMips32r6() - ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) - : (ArePtrs64bit ? Mips::LL64 : Mips::LL); - SC = Subtarget.hasMips32r6() - ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) - : (ArePtrs64bit ? Mips::SC64 : Mips::SC); - } - - ZERO = Mips::ZERO; - BNE = Mips::BNE; - BEQ = Mips::BEQ; - } else { - LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; - SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; - ZERO = Mips::ZERO_64; - BNE = Mips::BNE64; - BEQ = Mips::BEQ64; - } - - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Ptr = MI.getOperand(1).getReg(); - unsigned OldVal = MI.getOperand(2).getReg(); - unsigned NewVal = MI.getOperand(3).getReg(); - - unsigned Success = RegInfo.createVirtualRegister(RC); - - // insert new blocks after the current block - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - loop1MBB->addSuccessor(exitMBB); - loop1MBB->addSuccessor(loop2MBB); - loop2MBB->addSuccessor(loop1MBB); - loop2MBB->addSuccessor(exitMBB); - - // loop1MBB: - // ll dest, 0(ptr) - // bne dest, oldval, exitMBB - BB = loop1MBB; - BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); - BuildMI(BB, DL, TII->get(BNE)) - .addReg(Dest).addReg(OldVal).addMBB(exitMBB); - - // loop2MBB: - // sc success, newval, 0(ptr) - // beq success, $0, loop1MBB - BB = loop2MBB; - BuildMI(BB, DL, TII->get(SC), Success) - .addReg(NewVal).addReg(Ptr).addImm(0); - BuildMI(BB, DL, TII->get(BEQ)) - .addReg(Success).addReg(ZERO).addMBB(loop1MBB); - - MI.eraseFromParent(); // The instruction is gone now. - - return exitMBB; -} - MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { assert((Size == 1 || Size == 2) && @@ -1521,18 +1428,15 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); - unsigned OldVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); unsigned MaskUpper = RegInfo.createVirtualRegister(RC); unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); - unsigned StoreVal = RegInfo.createVirtualRegister(RC); - unsigned SrlRes = RegInfo.createVirtualRegister(RC); - unsigned Success = RegInfo.createVirtualRegister(RC); + unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_PSEUDO + ? Mips::ATOMIC_CMP_SWAP_I8_FRAG + : Mips::ATOMIC_CMP_SWAP_I16_FRAG; unsigned LL, SC; if (isMicroMips) { @@ -1547,14 +1451,8 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -1562,12 +1460,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - BB->addSuccessor(loop1MBB); - loop1MBB->addSuccessor(sinkMBB); - loop1MBB->addSuccessor(loop2MBB); - loop2MBB->addSuccessor(loop1MBB); - loop2MBB->addSuccessor(sinkMBB); - sinkMBB->addSuccessor(exitMBB); + BB->addSuccessor(exitMBB); // FIXME: computation of newval2 can be moved to loop2MBB. // thisMBB: @@ -1612,40 +1505,31 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal) .addReg(MaskedNewVal).addReg(ShiftAmt); - // loop1MBB: - // ll oldval,0(alginedaddr) - // and maskedoldval0,oldval,mask - // bne maskedoldval0,shiftedcmpval,sinkMBB - BB = loop1MBB; - BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) - .addReg(OldVal).addReg(Mask); - BuildMI(BB, DL, TII->get(Mips::BNE)) - .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB); - - // loop2MBB: - // and maskedoldval1,oldval,mask2 - // or storeval,maskedoldval1,shiftednewval - // sc success,storeval,0(alignedaddr) - // beq success,$0,loop1MBB - BB = loop2MBB; - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) - .addReg(OldVal).addReg(Mask2); - BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) - .addReg(MaskedOldVal1).addReg(ShiftedNewVal); - BuildMI(BB, DL, TII->get(SC), Success) - .addReg(StoreVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, DL, TII->get(Mips::BEQ)) - .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); - - // sinkMBB: - // srl srlres,maskedoldval0,shiftamt - // sign_extend dest,srlres - BB = sinkMBB; - - BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) - .addReg(MaskedOldVal0).addReg(ShiftAmt); - BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes); + // For correctness purpose, a new pseudo is introduced here. We need this + // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence + // that is spread over >1 basic blocks. A register allocator which + // introduces (or any codegen infact) a store, can violate the expactations + // of the hardware. + // + // An atomic read-modify-write sequence starts with a linked load + // instruction and ends with a store conditional instruction. The atomic + // read-modify-write sequence failes if any of the following conditions + // occur between the execution of ll and sc: + // * A coherent store is completed by another process or coherent I/O + // module into the block of synchronizable physical memory containing + // the word. The size and alignment of the block is + // implementation-dependent. + // * A coherent store is executed between an LL and SC sequence on the + // same processor to the block of synchornizable physical memory + // containing the word. + // + BuildMI(BB, DL, TII->get(AtomicOp), Dest) + .addReg(AlignedAddr) + .addReg(Mask) + .addReg(ShiftedCmpVal) + .addReg(Mask2) + .addReg(ShiftedNewVal) + .addReg(ShiftAmt); MI.eraseFromParent(); // The instruction is gone now. |