diff options
Diffstat (limited to 'llvm/lib/Target/Mips/MipsISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/Mips/MipsISelLowering.cpp | 605 |
1 files changed, 276 insertions, 329 deletions
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 825dd95fbcf..6d34764089f 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -1280,76 +1280,76 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, default: llvm_unreachable("Unexpected instr type to insert"); case Mips::ATOMIC_LOAD_ADD_I8: - return emitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_ADD_I16: - return emitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_ADD_I32: - return emitAtomicBinary(MI, BB, 4, Mips::ADDu); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_ADD_I64: - return emitAtomicBinary(MI, BB, 8, Mips::DADDu); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_AND_I8: - return emitAtomicBinaryPartword(MI, BB, 1, Mips::AND); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_AND_I16: - return emitAtomicBinaryPartword(MI, BB, 2, Mips::AND); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_AND_I32: - return emitAtomicBinary(MI, BB, 4, Mips::AND); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_AND_I64: - return emitAtomicBinary(MI, BB, 8, Mips::AND64); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_OR_I8: - return emitAtomicBinaryPartword(MI, BB, 1, Mips::OR); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_OR_I16: - return emitAtomicBinaryPartword(MI, BB, 2, Mips::OR); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_OR_I32: - return emitAtomicBinary(MI, BB, 4, Mips::OR); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_OR_I64: - return emitAtomicBinary(MI, BB, 8, Mips::OR64); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_XOR_I8: - return emitAtomicBinaryPartword(MI, BB, 1, Mips::XOR); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_XOR_I16: - return emitAtomicBinaryPartword(MI, BB, 2, Mips::XOR); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_XOR_I32: - return emitAtomicBinary(MI, BB, 4, Mips::XOR); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_XOR_I64: - return emitAtomicBinary(MI, BB, 8, Mips::XOR64); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_NAND_I8: - return emitAtomicBinaryPartword(MI, BB, 1, 0, true); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_NAND_I16: - return emitAtomicBinaryPartword(MI, BB, 2, 0, true); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_NAND_I32: - return emitAtomicBinary(MI, BB, 4, 0, true); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_NAND_I64: - return emitAtomicBinary(MI, BB, 8, 0, true); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_SUB_I8: - return emitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_SUB_I16: - return emitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_SUB_I32: - return emitAtomicBinary(MI, BB, 4, Mips::SUBu); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_SUB_I64: - return emitAtomicBinary(MI, BB, 8, Mips::DSUBu); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_SWAP_I8: - return emitAtomicBinaryPartword(MI, BB, 1, 0); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_SWAP_I16: - return emitAtomicBinaryPartword(MI, BB, 2, 0); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_SWAP_I32: - return emitAtomicBinary(MI, BB, 4, 0); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_SWAP_I64: - return emitAtomicBinary(MI, BB, 8, 0); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_CMP_SWAP_I8: return emitAtomicCmpSwapPartword(MI, BB, 1); case Mips::ATOMIC_CMP_SWAP_I16: return emitAtomicCmpSwapPartword(MI, BB, 2); case Mips::ATOMIC_CMP_SWAP_I32: - return emitAtomicCmpSwap(MI, BB, 4); + return emitAtomicCmpSwap(MI, BB); case Mips::ATOMIC_CMP_SWAP_I64: - return emitAtomicCmpSwap(MI, BB, 8); + return emitAtomicCmpSwap(MI, BB); case Mips::PseudoSDIV: case Mips::PseudoUDIV: case Mips::DIV: @@ -1398,99 +1398,121 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and // Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true) -MachineBasicBlock *MipsTargetLowering::emitAtomicBinary(MachineInstr &MI, - MachineBasicBlock *BB, - unsigned Size, - unsigned BinOpcode, - bool Nand) const { - assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicBinary."); +MachineBasicBlock * +MipsTargetLowering::emitAtomicBinary(MachineInstr &MI, + MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - const bool ArePtrs64bit = ABI.ArePtrs64bit(); DebugLoc DL = MI.getDebugLoc(); - unsigned LL, SC, AND, NOR, ZERO, BEQ; - if (Size == 4) { - if (isMicroMips) { - LL = Subtarget.hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM; - SC = Subtarget.hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM; - } else { - LL = Subtarget.hasMips32r6() - ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) - : (ArePtrs64bit ? Mips::LL64 : Mips::LL); - SC = Subtarget.hasMips32r6() - ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) - : (ArePtrs64bit ? Mips::SC64 : Mips::SC); - } - - AND = Mips::AND; - NOR = Mips::NOR; - ZERO = Mips::ZERO; - BEQ = Mips::BEQ; - } else { - LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; - SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; - AND = Mips::AND64; - NOR = Mips::NOR64; - ZERO = Mips::ZERO_64; - BEQ = Mips::BEQ64; + unsigned AtomicOp; + switch (MI.getOpcode()) { + case Mips::ATOMIC_LOAD_ADD_I32: + AtomicOp = Mips::ATOMIC_LOAD_ADD_I32_POSTRA; + break; + case Mips::ATOMIC_LOAD_SUB_I32: + AtomicOp = Mips::ATOMIC_LOAD_SUB_I32_POSTRA; + break; + case Mips::ATOMIC_LOAD_AND_I32: + AtomicOp = Mips::ATOMIC_LOAD_AND_I32_POSTRA; + break; + case Mips::ATOMIC_LOAD_OR_I32: + AtomicOp = Mips::ATOMIC_LOAD_OR_I32_POSTRA; + break; + case Mips::ATOMIC_LOAD_XOR_I32: + AtomicOp = Mips::ATOMIC_LOAD_XOR_I32_POSTRA; + break; + case Mips::ATOMIC_LOAD_NAND_I32: + AtomicOp = Mips::ATOMIC_LOAD_NAND_I32_POSTRA; + break; + case Mips::ATOMIC_SWAP_I32: + AtomicOp = Mips::ATOMIC_SWAP_I32_POSTRA; + break; + case Mips::ATOMIC_LOAD_ADD_I64: + AtomicOp = Mips::ATOMIC_LOAD_ADD_I64_POSTRA; + break; + case Mips::ATOMIC_LOAD_SUB_I64: + AtomicOp = Mips::ATOMIC_LOAD_SUB_I64_POSTRA; + break; + case Mips::ATOMIC_LOAD_AND_I64: + AtomicOp = Mips::ATOMIC_LOAD_AND_I64_POSTRA; + break; + case Mips::ATOMIC_LOAD_OR_I64: + AtomicOp = Mips::ATOMIC_LOAD_OR_I64_POSTRA; + break; + case Mips::ATOMIC_LOAD_XOR_I64: + AtomicOp = Mips::ATOMIC_LOAD_XOR_I64_POSTRA; + break; + case Mips::ATOMIC_LOAD_NAND_I64: + AtomicOp = Mips::ATOMIC_LOAD_NAND_I64_POSTRA; + break; + case Mips::ATOMIC_SWAP_I64: + AtomicOp = Mips::ATOMIC_SWAP_I64_POSTRA; + break; + default: + llvm_unreachable("Unknown pseudo atomic for replacement!"); } unsigned OldVal = MI.getOperand(0).getReg(); unsigned Ptr = MI.getOperand(1).getReg(); unsigned Incr = MI.getOperand(2).getReg(); + unsigned Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal)); + + MachineBasicBlock::iterator II(MI); + + // The scratch registers here with the EarlyClobber | Define | Implicit + // flags is used to persuade the register allocator and the machine + // verifier to accept the usage of this register. This has to be a real + // register which has an UNDEF value but is dead after the instruction which + // is unique among the registers chosen for the instruction. + + // The EarlyClobber flag has the semantic properties that the operand it is + // attached to is clobbered before the rest of the inputs are read. Hence it + // must be unique among the operands to the instruction. + // The Define flag is needed to coerce the machine verifier that an Undef + // value isn't a problem. + // The Dead flag is needed as the value in scratch isn't used by any other + // instruction. Kill isn't used as Dead is more precise. + // The implicit flag is here due to the interaction between the other flags + // and the machine verifier. + + // For correctness purpose, a new pseudo is introduced here. We need this + // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence + // that is spread over >1 basic blocks. A register allocator which + // introduces (or any codegen infact) a store, can violate the expectations + // of the hardware. + // + // An atomic read-modify-write sequence starts with a linked load + // instruction and ends with a store conditional instruction. The atomic + // read-modify-write sequence fails if any of the following conditions + // occur between the execution of ll and sc: + // * A coherent store is completed by another process or coherent I/O + // module into the block of synchronizable physical memory containing + // the word. The size and alignment of the block is + // implementation-dependent. + // * A coherent store is executed between an LL and SC sequence on the + // same processor to the block of synchornizable physical memory + // containing the word. + // - unsigned StoreVal = RegInfo.createVirtualRegister(RC); - unsigned AndRes = RegInfo.createVirtualRegister(RC); - unsigned Success = RegInfo.createVirtualRegister(RC); + unsigned PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr)); + unsigned IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr)); - // insert new blocks after the current block - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); + BuildMI(*BB, II, DL, TII->get(Mips::COPY), IncrCopy).addReg(Incr); + BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr); - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - loopMBB->addSuccessor(loopMBB); - loopMBB->addSuccessor(exitMBB); - - // loopMBB: - // ll oldval, 0(ptr) - // <binop> storeval, oldval, incr - // sc success, storeval, 0(ptr) - // beq success, $0, loopMBB - BB = loopMBB; - BuildMI(BB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); - if (Nand) { - // and andres, oldval, incr - // nor storeval, $0, andres - BuildMI(BB, DL, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr); - BuildMI(BB, DL, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes); - } else if (BinOpcode) { - // <binop> storeval, oldval, incr - BuildMI(BB, DL, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr); - } else { - StoreVal = Incr; - } - BuildMI(BB, DL, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0); - BuildMI(BB, DL, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB); + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(OldVal, RegState::Define | RegState::EarlyClobber) + .addReg(PtrCopy) + .addReg(IncrCopy) + .addReg(Scratch, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); - MI.eraseFromParent(); // The instruction is gone now. + MI.eraseFromParent(); - return exitMBB; + return BB; } MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg( @@ -1524,8 +1546,7 @@ MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg( } MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( - MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, - bool Nand) const { + MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { assert((Size == 1 || Size == 2) && "Unsupported size for EmitAtomicBinaryPartial."); @@ -1546,39 +1567,66 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); - unsigned NewVal = RegInfo.createVirtualRegister(RC); - unsigned OldVal = RegInfo.createVirtualRegister(RC); unsigned Incr2 = RegInfo.createVirtualRegister(RC); unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); unsigned MaskUpper = RegInfo.createVirtualRegister(RC); - unsigned AndRes = RegInfo.createVirtualRegister(RC); - unsigned BinOpRes = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); - unsigned StoreVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); - unsigned SrlRes = RegInfo.createVirtualRegister(RC); - unsigned Success = RegInfo.createVirtualRegister(RC); - - unsigned LL, SC; - if (isMicroMips) { - LL = Subtarget.hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM; - SC = Subtarget.hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM; - } else { - LL = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) - : (ArePtrs64bit ? Mips::LL64 : Mips::LL); - SC = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) - : (ArePtrs64bit ? Mips::SC64 : Mips::SC); + unsigned Scratch = RegInfo.createVirtualRegister(RC); + unsigned Scratch2 = RegInfo.createVirtualRegister(RC); + unsigned Scratch3 = RegInfo.createVirtualRegister(RC); + + unsigned AtomicOp = 0; + switch (MI.getOpcode()) { + case Mips::ATOMIC_LOAD_NAND_I8: + AtomicOp = Mips::ATOMIC_LOAD_NAND_I8_POSTRA; + break; + case Mips::ATOMIC_LOAD_NAND_I16: + AtomicOp = Mips::ATOMIC_LOAD_NAND_I16_POSTRA; + break; + case Mips::ATOMIC_SWAP_I8: + AtomicOp = Mips::ATOMIC_SWAP_I8_POSTRA; + break; + case Mips::ATOMIC_SWAP_I16: + AtomicOp = Mips::ATOMIC_SWAP_I16_POSTRA; + break; + case Mips::ATOMIC_LOAD_ADD_I8: + AtomicOp = Mips::ATOMIC_LOAD_ADD_I8_POSTRA; + break; + case Mips::ATOMIC_LOAD_ADD_I16: + AtomicOp = Mips::ATOMIC_LOAD_ADD_I16_POSTRA; + break; + case Mips::ATOMIC_LOAD_SUB_I8: + AtomicOp = Mips::ATOMIC_LOAD_SUB_I8_POSTRA; + break; + case Mips::ATOMIC_LOAD_SUB_I16: + AtomicOp = Mips::ATOMIC_LOAD_SUB_I16_POSTRA; + break; + case Mips::ATOMIC_LOAD_AND_I8: + AtomicOp = Mips::ATOMIC_LOAD_AND_I8_POSTRA; + break; + case Mips::ATOMIC_LOAD_AND_I16: + AtomicOp = Mips::ATOMIC_LOAD_AND_I16_POSTRA; + break; + case Mips::ATOMIC_LOAD_OR_I8: + AtomicOp = Mips::ATOMIC_LOAD_OR_I8_POSTRA; + break; + case Mips::ATOMIC_LOAD_OR_I16: + AtomicOp = Mips::ATOMIC_LOAD_OR_I16_POSTRA; + break; + case Mips::ATOMIC_LOAD_XOR_I8: + AtomicOp = Mips::ATOMIC_LOAD_XOR_I8_POSTRA; + break; + case Mips::ATOMIC_LOAD_XOR_I16: + AtomicOp = Mips::ATOMIC_LOAD_XOR_I16_POSTRA; + break; + default: + llvm_unreachable("Unknown subword atomic pseudo for expansion!"); } // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loopMBB); - MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -1586,10 +1634,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - BB->addSuccessor(loopMBB); - loopMBB->addSuccessor(loopMBB); - loopMBB->addSuccessor(sinkMBB); - sinkMBB->addSuccessor(exitMBB); + BB->addSuccessor(exitMBB, BranchProbability::getOne()); // thisMBB: // addiu masklsb2,$0,-4 # 0xfffffffc @@ -1623,159 +1668,92 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); BuildMI(BB, DL, TII->get(Mips::SLLV), Incr2).addReg(Incr).addReg(ShiftAmt); - // atomic.load.binop - // loopMBB: - // ll oldval,0(alignedaddr) - // binop binopres,oldval,incr2 - // and newval,binopres,mask - // and maskedoldval0,oldval,mask2 - // or storeval,maskedoldval0,newval - // sc success,storeval,0(alignedaddr) - // beq success,$0,loopMBB - - // atomic.swap - // loopMBB: - // ll oldval,0(alignedaddr) - // and newval,incr2,mask - // and maskedoldval0,oldval,mask2 - // or storeval,maskedoldval0,newval - // sc success,storeval,0(alignedaddr) - // beq success,$0,loopMBB - - BB = loopMBB; - BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); - if (Nand) { - // and andres, oldval, incr2 - // nor binopres, $0, andres - // and newval, binopres, mask - BuildMI(BB, DL, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2); - BuildMI(BB, DL, TII->get(Mips::NOR), BinOpRes) - .addReg(Mips::ZERO).addReg(AndRes); - BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); - } else if (BinOpcode) { - // <binop> binopres, oldval, incr2 - // and newval, binopres, mask - BuildMI(BB, DL, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2); - BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); - } else { // atomic.swap - // and newval, incr2, mask - BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask); - } - - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) - .addReg(OldVal).addReg(Mask2); - BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) - .addReg(MaskedOldVal0).addReg(NewVal); - BuildMI(BB, DL, TII->get(SC), Success) - .addReg(StoreVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, DL, TII->get(Mips::BEQ)) - .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); - // sinkMBB: - // and maskedoldval1,oldval,mask - // srl srlres,maskedoldval1,shiftamt - // sign_extend dest,srlres - BB = sinkMBB; - - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) - .addReg(OldVal).addReg(Mask); - BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) - .addReg(MaskedOldVal1).addReg(ShiftAmt); - BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes); + // The purposes of the flags on the scratch registers is explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among registers chosen for the instruction. + + BuildMI(BB, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(AlignedAddr) + .addReg(Incr2) + .addReg(Mask) + .addReg(Mask2) + .addReg(ShiftAmt) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch3, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); MI.eraseFromParent(); // The instruction is gone now. return exitMBB; } -MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, - MachineBasicBlock *BB, - unsigned Size) const { - assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap."); +// Lower atomic compare and swap to a pseudo instruction, taking care to +// define a scratch register for the pseudo instruction's expansion. The +// instruction is expanded after the register allocator as to prevent +// the insertion of stores between the linked load and the store conditional. + +MachineBasicBlock * +MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, + MachineBasicBlock *BB) const { + + assert((MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 || + MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I64) && + "Unsupported atomic psseudo for EmitAtomicCmpSwap."); + + const unsigned Size = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 ? 4 : 8; MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &RegInfo = MF->getRegInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - const bool ArePtrs64bit = ABI.ArePtrs64bit(); DebugLoc DL = MI.getDebugLoc(); - unsigned LL, SC, ZERO, BNE, BEQ; - - if (Size == 4) { - if (isMicroMips) { - LL = Subtarget.hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM; - SC = Subtarget.hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM; - } else { - LL = Subtarget.hasMips32r6() - ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) - : (ArePtrs64bit ? Mips::LL64 : Mips::LL); - SC = Subtarget.hasMips32r6() - ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) - : (ArePtrs64bit ? Mips::SC64 : Mips::SC); - } - - ZERO = Mips::ZERO; - BNE = Mips::BNE; - BEQ = Mips::BEQ; - } else { - LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; - SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; - ZERO = Mips::ZERO_64; - BNE = Mips::BNE64; - BEQ = Mips::BEQ64; - } + unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 + ? Mips::ATOMIC_CMP_SWAP_I32_POSTRA + : Mips::ATOMIC_CMP_SWAP_I64_POSTRA; unsigned Dest = MI.getOperand(0).getReg(); unsigned Ptr = MI.getOperand(1).getReg(); unsigned OldVal = MI.getOperand(2).getReg(); unsigned NewVal = MI.getOperand(3).getReg(); - unsigned Success = RegInfo.createVirtualRegister(RC); + unsigned Scratch = MRI.createVirtualRegister(RC); + MachineBasicBlock::iterator II(MI); - // insert new blocks after the current block - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); + // We need to create copies of the various registers and kill them at the + // atomic pseudo. If the copies are not made, when the atomic is expanded + // after fast register allocation, the spills will end up outside of the + // blocks that their values are defined in, causing livein errors. - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); + unsigned DestCopy = MRI.createVirtualRegister(MRI.getRegClass(Dest)); + unsigned PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr)); + unsigned OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal)); + unsigned NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal)); - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - loop1MBB->addSuccessor(exitMBB); - loop1MBB->addSuccessor(loop2MBB); - loop2MBB->addSuccessor(loop1MBB); - loop2MBB->addSuccessor(exitMBB); - - // loop1MBB: - // ll dest, 0(ptr) - // bne dest, oldval, exitMBB - BB = loop1MBB; - BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); - BuildMI(BB, DL, TII->get(BNE)) - .addReg(Dest).addReg(OldVal).addMBB(exitMBB); - - // loop2MBB: - // sc success, newval, 0(ptr) - // beq success, $0, loop1MBB - BB = loop2MBB; - BuildMI(BB, DL, TII->get(SC), Success) - .addReg(NewVal).addReg(Ptr).addImm(0); - BuildMI(BB, DL, TII->get(BEQ)) - .addReg(Success).addReg(ZERO).addMBB(loop1MBB); + BuildMI(*BB, II, DL, TII->get(Mips::COPY), DestCopy).addReg(Dest); + BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr); + BuildMI(*BB, II, DL, TII->get(Mips::COPY), OldValCopy).addReg(OldVal); + BuildMI(*BB, II, DL, TII->get(Mips::COPY), NewValCopy).addReg(NewVal); + + // The purposes of the flags on the scratch registers is explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among registers chosen for the instruction. + + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(PtrCopy, RegState::Kill) + .addReg(OldValCopy, RegState::Kill) + .addReg(NewValCopy, RegState::Kill) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); MI.eraseFromParent(); // The instruction is gone now. - return exitMBB; + return BB; } MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( @@ -1802,40 +1780,33 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); - unsigned OldVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); unsigned MaskUpper = RegInfo.createVirtualRegister(RC); unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); - unsigned StoreVal = RegInfo.createVirtualRegister(RC); - unsigned SrlRes = RegInfo.createVirtualRegister(RC); - unsigned Success = RegInfo.createVirtualRegister(RC); - unsigned LL, SC; - - if (isMicroMips) { - LL = Subtarget.hasMips32r6() ? Mips::LL_MMR6 : Mips::LL_MM; - SC = Subtarget.hasMips32r6() ? Mips::SC_MMR6 : Mips::SC_MM; - } else { - LL = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) - : (ArePtrs64bit ? Mips::LL64 : Mips::LL); - SC = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) - : (ArePtrs64bit ? Mips::SC64 : Mips::SC); - } + unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8 + ? Mips::ATOMIC_CMP_SWAP_I8_POSTRA + : Mips::ATOMIC_CMP_SWAP_I16_POSTRA; + + // The scratch registers here with the EarlyClobber | Define | Dead | Implicit + // flags are used to coerce the register allocator and the machine verifier to + // accept the usage of these registers. + // The EarlyClobber flag has the semantic properties that the operand it is + // attached to is clobbered before the rest of the inputs are read. Hence it + // must be unique among the operands to the instruction. + // The Define flag is needed to coerce the machine verifier that an Undef + // value isn't a problem. + // The Dead flag is needed as the value in scratch isn't used by any other + // instruction. Kill isn't used as Dead is more precise. + unsigned Scratch = RegInfo.createVirtualRegister(RC); + unsigned Scratch2 = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -1843,14 +1814,8 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - BB->addSuccessor(loop1MBB); - loop1MBB->addSuccessor(sinkMBB); - loop1MBB->addSuccessor(loop2MBB); - loop2MBB->addSuccessor(loop1MBB); - loop2MBB->addSuccessor(sinkMBB); - sinkMBB->addSuccessor(exitMBB); + BB->addSuccessor(exitMBB, BranchProbability::getOne()); - // FIXME: computation of newval2 can be moved to loop2MBB. // thisMBB: // addiu masklsb2,$0,-4 # 0xfffffffc // and alignedaddr,ptr,masklsb2 @@ -1893,40 +1858,22 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal) .addReg(MaskedNewVal).addReg(ShiftAmt); - // loop1MBB: - // ll oldval,0(alginedaddr) - // and maskedoldval0,oldval,mask - // bne maskedoldval0,shiftedcmpval,sinkMBB - BB = loop1MBB; - BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) - .addReg(OldVal).addReg(Mask); - BuildMI(BB, DL, TII->get(Mips::BNE)) - .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB); - - // loop2MBB: - // and maskedoldval1,oldval,mask2 - // or storeval,maskedoldval1,shiftednewval - // sc success,storeval,0(alignedaddr) - // beq success,$0,loop1MBB - BB = loop2MBB; - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) - .addReg(OldVal).addReg(Mask2); - BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) - .addReg(MaskedOldVal1).addReg(ShiftedNewVal); - BuildMI(BB, DL, TII->get(SC), Success) - .addReg(StoreVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, DL, TII->get(Mips::BEQ)) - .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); - - // sinkMBB: - // srl srlres,maskedoldval0,shiftamt - // sign_extend dest,srlres - BB = sinkMBB; - - BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) - .addReg(MaskedOldVal0).addReg(ShiftAmt); - BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes); + // The purposes of the flags on the scratch registers are explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among the register chosen for the instruction. + + BuildMI(BB, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(AlignedAddr) + .addReg(Mask) + .addReg(ShiftedCmpVal) + .addReg(Mask2) + .addReg(ShiftedNewVal) + .addReg(ShiftAmt) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); MI.eraseFromParent(); // The instruction is gone now. |

