diff options
author | Simon Dardis <simon.dardis@imgtec.com> | 2017-02-24 16:30:27 +0000 |
---|---|---|
committer | Simon Dardis <simon.dardis@imgtec.com> | 2017-02-24 16:30:27 +0000 |
commit | 3c58c18ff0f9670f5d4d493263f2cd7dc84258c0 (patch) | |
tree | 1cc470e6f44a51362ec959e97759ca2dbe857d69 /llvm/lib/Target/Mips/MipsISelLowering.cpp | |
parent | cf0e06d375d8524314950805cddf189eb775a84c (diff) | |
download | bcm5719-llvm-3c58c18ff0f9670f5d4d493263f2cd7dc84258c0.tar.gz bcm5719-llvm-3c58c18ff0f9670f5d4d493263f2cd7dc84258c0.zip |
Revert "[mips] Fix atomic compare and swap at O0."
This reverts r296132. I forgot to include the tests.
llvm-svn: 296133
Diffstat (limited to 'llvm/lib/Target/Mips/MipsISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/Mips/MipsISelLowering.cpp | 180 |
1 files changed, 148 insertions, 32 deletions
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 1e72f4e47ec..f0f2424f722 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -1053,11 +1053,14 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case Mips::ATOMIC_SWAP_I64: return emitAtomicBinary(MI, BB, 8, 0); - case Mips::ATOMIC_CMP_SWAP_I8_PSEUDO: + case Mips::ATOMIC_CMP_SWAP_I8: return emitAtomicCmpSwapPartword(MI, BB, 1); - case Mips::ATOMIC_CMP_SWAP_I16_PSEUDO: + case Mips::ATOMIC_CMP_SWAP_I16: return emitAtomicCmpSwapPartword(MI, BB, 2); - + case Mips::ATOMIC_CMP_SWAP_I32: + return emitAtomicCmpSwap(MI, BB, 4); + case Mips::ATOMIC_CMP_SWAP_I64: + return emitAtomicCmpSwap(MI, BB, 8); case Mips::PseudoSDIV: case Mips::PseudoUDIV: case Mips::DIV: @@ -1404,6 +1407,96 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( return exitMBB; } +MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Size) const { + assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const bool ArePtrs64bit = ABI.ArePtrs64bit(); + DebugLoc DL = MI.getDebugLoc(); + unsigned LL, SC, ZERO, BNE, BEQ; + + if (Size == 4) { + if (isMicroMips) { + LL = Mips::LL_MM; + SC = Mips::SC_MM; + } else { + LL = Subtarget.hasMips32r6() + ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) + : (ArePtrs64bit ? Mips::LL64 : Mips::LL); + SC = Subtarget.hasMips32r6() + ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) + : (ArePtrs64bit ? Mips::SC64 : Mips::SC); + } + + ZERO = Mips::ZERO; + BNE = Mips::BNE; + BEQ = Mips::BEQ; + } else { + LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; + SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; + ZERO = Mips::ZERO_64; + BNE = Mips::BNE64; + BEQ = Mips::BEQ64; + } + + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned OldVal = MI.getOperand(2).getReg(); + unsigned NewVal = MI.getOperand(3).getReg(); + + unsigned Success = RegInfo.createVirtualRegister(RC); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB->getIterator(); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB->addSuccessor(loop1MBB); + loop1MBB->addSuccessor(exitMBB); + loop1MBB->addSuccessor(loop2MBB); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(exitMBB); + + // loop1MBB: + // ll dest, 0(ptr) + // bne dest, oldval, exitMBB + BB = loop1MBB; + BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); + BuildMI(BB, DL, TII->get(BNE)) + .addReg(Dest).addReg(OldVal).addMBB(exitMBB); + + // loop2MBB: + // sc success, newval, 0(ptr) + // beq success, $0, loop1MBB + BB = loop2MBB; + BuildMI(BB, DL, TII->get(SC), Success) + .addReg(NewVal).addReg(Ptr).addImm(0); + BuildMI(BB, DL, TII->get(BEQ)) + .addReg(Success).addReg(ZERO).addMBB(loop1MBB); + + MI.eraseFromParent(); // The instruction is gone now. + + return exitMBB; +} + MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { assert((Size == 1 || Size == 2) && @@ -1428,15 +1521,18 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned OldVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); unsigned MaskUpper = RegInfo.createVirtualRegister(RC); unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); - unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_PSEUDO - ? Mips::ATOMIC_CMP_SWAP_I8_FRAG - : Mips::ATOMIC_CMP_SWAP_I16_FRAG; + unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); + unsigned StoreVal = RegInfo.createVirtualRegister(RC); + unsigned SrlRes = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); unsigned LL, SC; if (isMicroMips) { @@ -1451,8 +1547,14 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -1460,7 +1562,12 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - BB->addSuccessor(exitMBB); + BB->addSuccessor(loop1MBB); + loop1MBB->addSuccessor(sinkMBB); + loop1MBB->addSuccessor(loop2MBB); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(sinkMBB); + sinkMBB->addSuccessor(exitMBB); // FIXME: computation of newval2 can be moved to loop2MBB. // thisMBB: @@ -1505,31 +1612,40 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal) .addReg(MaskedNewVal).addReg(ShiftAmt); - // For correctness purpose, a new pseudo is introduced here. We need this - // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence - // that is spread over >1 basic blocks. A register allocator which - // introduces (or any codegen infact) a store, can violate the expactations - // of the hardware. - // - // An atomic read-modify-write sequence starts with a linked load - // instruction and ends with a store conditional instruction. The atomic - // read-modify-write sequence failes if any of the following conditions - // occur between the execution of ll and sc: - // * A coherent store is completed by another process or coherent I/O - // module into the block of synchronizable physical memory containing - // the word. The size and alignment of the block is - // implementation-dependent. - // * A coherent store is executed between an LL and SC sequence on the - // same processor to the block of synchornizable physical memory - // containing the word. - // - BuildMI(BB, DL, TII->get(AtomicOp), Dest) - .addReg(AlignedAddr) - .addReg(Mask) - .addReg(ShiftedCmpVal) - .addReg(Mask2) - .addReg(ShiftedNewVal) - .addReg(ShiftAmt); + // loop1MBB: + // ll oldval,0(alginedaddr) + // and maskedoldval0,oldval,mask + // bne maskedoldval0,shiftedcmpval,sinkMBB + BB = loop1MBB; + BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) + .addReg(OldVal).addReg(Mask); + BuildMI(BB, DL, TII->get(Mips::BNE)) + .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB); + + // loop2MBB: + // and maskedoldval1,oldval,mask2 + // or storeval,maskedoldval1,shiftednewval + // sc success,storeval,0(alignedaddr) + // beq success,$0,loop1MBB + BB = loop2MBB; + BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) + .addReg(OldVal).addReg(Mask2); + BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) + .addReg(MaskedOldVal1).addReg(ShiftedNewVal); + BuildMI(BB, DL, TII->get(SC), Success) + .addReg(StoreVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, DL, TII->get(Mips::BEQ)) + .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); + + // sinkMBB: + // srl srlres,maskedoldval0,shiftamt + // sign_extend dest,srlres + BB = sinkMBB; + + BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) + .addReg(MaskedOldVal0).addReg(ShiftAmt); + BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes); MI.eraseFromParent(); // The instruction is gone now. |