diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/PowerPC/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPC.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 52 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 30 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 69 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 22 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 15 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.td | 25 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCMCInstLower.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 111 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/retaddr2.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/tls-cse.ll | 52 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/tls-store2.ll | 6 |
14 files changed, 287 insertions, 114 deletions
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt index 972cd525fba..eb3a8719ba3 100644 --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -31,6 +31,7 @@ add_llvm_target(PowerPCCodeGen PPCTargetObjectFile.cpp PPCTargetTransformInfo.cpp PPCSelectionDAGInfo.cpp + PPCTLSDynamicCall.cpp PPCVSXCopy.cpp PPCVSXFMAMutate.cpp ) diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h index 2102afd4efb..f773b91b6c0 100644 --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -39,6 +39,7 @@ namespace llvm { FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); + FunctionPass *createPPCTLSDynamicCallPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); @@ -89,12 +90,7 @@ namespace llvm { MO_TOC_LO = 7 << 4, // Symbol for VK_PPC_TLS fixup attached to an ADD instruction - MO_TLS = 8 << 4, - - // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr - // call sequences. - MO_TLSLD = 9 << 4, - MO_TLSGD = 10 << 4 + MO_TLS = 8 << 4 }; } // end namespace PPCII diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 55da9133145..4e31b296d99 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -101,6 +101,7 @@ namespace { const MachineInstr &MI); void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI); + void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK); }; /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux @@ -404,6 +405,39 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP)); } +/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a +/// call to __tls_get_addr to the current output stream. +void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, + MCSymbolRefExpr::VariantKind VK) { + StringRef Name = "__tls_get_addr"; + MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + + assert(MI->getOperand(0).isReg() && + ((Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::X3) || + (!Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::R3)) && + "GETtls[ld]ADDR[32] must define GPR3"); + assert(MI->getOperand(1).isReg() && + ((Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::X3) || + (!Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) && + "GETtls[ld]ADDR[32] must read GPR3"); + + if (!Subtarget.isPPC64() && !Subtarget.isDarwin() && + TM.getRelocationModel() == Reloc::PIC_) + Kind = MCSymbolRefExpr::VK_PLT; + const MCSymbolRefExpr *TlsRef = + MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = getSymbol(GValue); + const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, VK, OutContext); + EmitToStreamer(OutStreamer, + MCInstBuilder(Subtarget.isPPC64() ? + PPC::BL8_NOP_TLS : PPC::BL_TLS) + .addExpr(TlsRef) + .addExpr(SymVar)); +} + /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to /// the current output stream. /// @@ -807,6 +841,15 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addExpr(SymGotTlsGD)); return; } + case PPC::GETtlsADDR: + // Transform: %X3 = GETtlsADDR %X3, <ga:@sym> + // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd) + case PPC::GETtlsADDR32: { + // Transform: %R3 = GETtlsADDR32 %R3, <ga:@sym> + // Into: BL_TLS __tls_get_addr(sym@tlsgd)@PLT + EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD); + return; + } case PPC::ADDIStlsldHA: { // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym> // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha @@ -844,6 +887,15 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addExpr(SymGotTlsLD)); return; } + case PPC::GETtlsldADDR: + // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym> + // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld) + case PPC::GETtlsldADDR32: { + // Transform: %R3 = GETtlsldADDR32 %R3, <ga:@sym> + // Into: BL_TLS __tls_get_addr(sym@tlsld)@PLT + EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD); + return; + } case PPC::ADDISdtprelHA: // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym> // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index b9aeba13fd8..95143d54c7e 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -355,6 +355,20 @@ static bool hasNonRISpills(const MachineFunction &MF) { return FuncInfo->hasNonRISpills(); } +/// MustSaveLR - Return true if this function requires that we save the LR +/// register onto the stack in the prolog and restore it in the epilog of the +/// function. +static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { + const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); + + // We need a save/restore of LR if there is any def of LR (which is + // defined by calls, including the PIC setup sequence), or if there is + // some use of the LR stack slot (e.g. for builtin_return_address). + // (LR comes in 32 and 64 bit versions.) + MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); + return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, @@ -381,6 +395,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // stackless code if all local vars are reg-allocated. bool DisableRedZone = MF.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone); + unsigned LR = RegInfo->getRARegister(); if (!DisableRedZone && (Subtarget.isPPC64() || // 32-bit SVR4, no stack- !Subtarget.isSVR4ABI() || // allocated locals. @@ -388,6 +403,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. + !MustSaveLR(MF, LR) && !RegInfo->hasBasePointer(MF)) { // No special alignment. // No need for frame if (UpdateMF) @@ -1108,20 +1124,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } -/// MustSaveLR - Return true if this function requires that we save the LR -/// register onto the stack in the prolog and restore it in the epilog of the -/// function. -static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { - const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); - - // We need a save/restore of LR if there is any def of LR (which is - // defined by calls, including the PIC setup sequence), or if there is - // some use of the LR stack slot (e.g. for builtin_return_address). - // (LR comes in 32 and 64 bit versions.) - MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); - return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); -} - void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *) const { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index b4416148571..15545993a68 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -791,8 +791,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::SHL: return "PPCISD::SHL"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; - case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS"; - case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; @@ -826,8 +824,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; + case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; + case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; @@ -1686,27 +1686,6 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); } -// Generate a call to __tls_get_addr for the given GOT entry Op. -std::pair<SDValue,SDValue> -PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl, - SelectionDAG &DAG) const { - - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Op; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(CallingConv::C, IntPtrTy, - DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()), - std::move(Args), 0); - - return LowerCallTo(CLI); -} - SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { @@ -1753,8 +1732,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, } if (Model == TLSModel::GeneralDynamic) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TLSGD); + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); @@ -1767,15 +1745,13 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } - SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, - GOTPtr, TGA); - std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); - return CallResult.first; + SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, + PtrVT, GOTPtr, TGA); + return DAG.getNode(PPCISD::GET_TLS_ADDR, dl, PtrVT, GOTEntry, TGA); } if (Model == TLSModel::LocalDynamic) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TLSLD); + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); @@ -1790,11 +1766,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, } SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, GOTPtr, TGA); - std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); - SDValue TLSAddr = CallResult.first; - SDValue Chain = CallResult.second; - SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, - Chain, TLSAddr, TGA); + SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, + PtrVT, GOTEntry, TGA); + SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, + PtrVT, TLSAddr, TGA); return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); } @@ -3818,23 +3793,6 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if (Callee.getNode()) { Ops.push_back(Chain); Ops.push_back(Callee); - - // If this is a call to __tls_get_addr, find the symbol whose address - // is to be taken and add it to the list. This will be used to - // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld). - // We find the symbol by walking the chain to the CopyFromReg, walking - // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and - // pulling the symbol from that node. - if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) - if (!strcmp(S->getSymbol(), "__tls_get_addr")) { - assert(!needIndirectCall && "Indirect call to __tls_get_addr???"); - SDNode *AddI = Chain.getNode()->getOperand(2).getNode(); - SDValue TGTAddr = AddI->getOperand(1); - assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress && - "Didn't find target global TLS address where we expected one"); - Ops.push_back(TGTAddr); - CallOpc = PPCISD::CALL_TLS; - } } // If this is a tail call add stack pointer delta. if (isTailCall) @@ -3997,12 +3955,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, Ops.insert(std::next(Ops.begin()), AddTOC); } else if ((CallOpc == PPCISD::CALL) && (!isLocalCall(Callee) || - DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { + DAG.getTarget().getRelocationModel() == Reloc::PIC_)) // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; - } else if (CallOpc == PPCISD::CALL_TLS) - // For 64-bit SVR4, TLS calls are always non-local. - CallOpc = PPCISD::CALL_NOP_TLS; } Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 21fcc86d151..1691e4ed6df 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -101,10 +101,6 @@ namespace llvm { /// SVR4 calls. CALL, CALL_NOP, - /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used - /// to access TLS variables. - CALL_TLS, CALL_NOP_TLS, - /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. MTCTR, @@ -228,6 +224,10 @@ namespace llvm { /// sym\@got\@tlsgd\@l. ADDI_TLSGD_L, + /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS + /// model, produces a call to __tls_get_addr(sym\@tlsgd). + GET_TLS_ADDR, + /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsld\@ha. @@ -238,11 +238,13 @@ namespace llvm { /// sym\@got\@tlsld\@l. ADDI_TLSLD_L, - /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the - /// local-dynamic TLS model, produces an ADDIS8 instruction - /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed - /// to tie this in place following a copy to %X3 from the result - /// of a GET_TLSLD_ADDR. + /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS + /// model, produces a call to __tls_get_addr(sym\@tlsld). + GET_TLSLD_ADDR, + + /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS + /// model, produces an ADDIS8 instruction that adds X3 to + /// sym\@dtprel\@ha. ADDIS_DTPREL_HA, /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS @@ -635,8 +637,6 @@ namespace llvm { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; - std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl, - SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 0ba6ecb5a60..71f23d44276 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -202,9 +202,6 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)), def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), (BL8_NOP texternalsym:$dst)>; -def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym), - (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>; - // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { @@ -904,6 +901,12 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), [(set i64:$rD, (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR8] in +def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), + "#GETtlsADDR", + [(set i64:$rD, + (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDIStlsldHA", [(set i64:$rD, @@ -914,6 +917,12 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), [(set i64:$rD, (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR8] in +def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), + "#GETtlsldADDR", + [(set i64:$rD, + (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), "#ADDISdtprelHA", [(set i64:$rD, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 65a71e4cc5c..b4a64fbf8bb 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -110,10 +110,11 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; +def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; -def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp, - [SDNPHasChain]>; +def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; +def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; @@ -136,15 +137,9 @@ def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, @@ -2459,9 +2454,6 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)), def : Pat<(PPCcall (i32 texternalsym:$dst)), (BL texternalsym:$dst)>; -def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym), - (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>; - def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; @@ -2516,10 +2508,21 @@ def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDItlsgdL32", [(set i32:$rD, (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>; +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR] in +def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), + "GETtlsADDR32", + [(set i32:$rD, + (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>; def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDItlsldL32", [(set i32:$rD, (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>; +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR] in +def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), + "GETtlsldADDR32", + [(set i32:$rD, + (PPCgetTlsldAddr i32:$reg, + tglobaltlsaddr:$sym))]>; def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDIdtprelL32", [(set i32:$rD, diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index ee0dcafb8ad..819738b2062 100644 --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -137,12 +137,6 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, case PPCII::MO_TLS: RefKind = MCSymbolRefExpr::VK_PPC_TLS; break; - case PPCII::MO_TLSGD: - RefKind = MCSymbolRefExpr::VK_PPC_TLSGD; - break; - case PPCII::MO_TLSLD: - RefKind = MCSymbolRefExpr::VK_PPC_TLSLD; - break; } if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin) diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp new file mode 100644 index 00000000000..b0e1dfa52a4 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -0,0 +1,111 @@ +//===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass fixes up GETtls[ld]ADDR[32] machine instructions so that +// they read and write GPR3. These are really call instructions, so +// must use the calling convention registers. This is done in a late +// pass so that TLS variable accesses can be fully commoned. +// +//===----------------------------------------------------------------------===// + +#include "PPCInstrInfo.h" +#include "PPC.h" +#include "PPCInstrBuilder.h" +#include "PPCTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-tls-dynamic-call" + +namespace llvm { + void initializePPCTLSDynamicCallPass(PassRegistry&); +} + +namespace { + // PPCTLSDynamicCall pass - Add copies to and from GPR3 around + // GETtls[ld]ADDR[32] machine instructions. These instructions + // are actually call instructions, so the register choice is + // constrained. We delay introducing these copies as late as + // possible so that TLS variable accesses can be fully commoned. + struct PPCTLSDynamicCall : public MachineFunctionPass { + static char ID; + PPCTLSDynamicCall() : MachineFunctionPass(ID) { + initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry()); + } + + const PPCTargetMachine *TM; + const PPCInstrInfo *TII; + +protected: + bool processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + bool Is64Bit = TM->getSubtargetImpl()->isPPC64(); + + for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); + I != IE; ++I) { + MachineInstr *MI = I; + + if (MI->getOpcode() != PPC::GETtlsADDR && + MI->getOpcode() != PPC::GETtlsldADDR) + continue; + + DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << *MI;); + + unsigned OutReg = MI->getOperand(0).getReg(); + unsigned InReg = MI->getOperand(1).getReg(); + DebugLoc DL = MI->getDebugLoc(); + unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3; + + BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3) + .addReg(InReg); + MI->getOperand(0).setReg(GPR3); + MI->getOperand(1).setReg(GPR3); + BuildMI(MBB, ++I, DL, TII->get(TargetOpcode::COPY), OutReg) + .addReg(GPR3); + + Changed = true; + } + + return Changed; + } + +public: + bool runOnMachineFunction(MachineFunction &MF) override { + TM = static_cast<const PPCTargetMachine *>(&MF.getTarget()); + TII = TM->getSubtargetImpl()->getInstrInfo(); + + bool Changed = false; + + for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + MachineBasicBlock &B = *I++; + if (processBlock(B)) + Changed = true; + } + + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE, + "PowerPC TLS Dynamic Call Fixup", false, false) +INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE, + "PowerPC TLS Dynamic Call Fixup", false, false) + +char PPCTLSDynamicCall::ID = 0; +FunctionPass* +llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 0a3f6e59646..03425c9ca5f 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -259,6 +259,7 @@ void PPCPassConfig::addPreRegAlloc() { initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, &PPCVSXFMAMutateID); + addPass(createPPCTLSDynamicCallPass()); } void PPCPassConfig::addPreSched2() { diff --git a/llvm/test/CodeGen/PowerPC/retaddr2.ll b/llvm/test/CodeGen/PowerPC/retaddr2.ll index 8fa3b4d13b7..8581f6cb9a3 100644 --- a/llvm/test/CodeGen/PowerPC/retaddr2.ll +++ b/llvm/test/CodeGen/PowerPC/retaddr2.ll @@ -12,8 +12,7 @@ entry: ; CHECK-LABEL: @test1 ; CHECK: mflr 0 ; CHECK: std 0, 16(1) -; FIXME: These next two lines don't both need to load the same value. -; CHECK-DAG: ld 3, 16(1) +; CHECK-DAG: ld 3, 64(1) ; CHECK-DAG: ld 0, 16(1) ; CHECK: mtlr 0 ; CHECK: blr diff --git a/llvm/test/CodeGen/PowerPC/tls-cse.ll b/llvm/test/CodeGen/PowerPC/tls-cse.ll new file mode 100644 index 00000000000..2aa75f90782 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/tls-cse.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s +; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | grep "__tls_get_addr" | count 1 + +; This test was derived from LLVM's own +; PrettyStackTraceEntry::~PrettyStackTraceEntry(). It demonstrates an +; opportunity for CSE of calls to __tls_get_addr(). + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +%"class.llvm::PrettyStackTraceEntry" = type { i32 (...)**, %"class.llvm::PrettyStackTraceEntry"* } + +@_ZTVN4llvm21PrettyStackTraceEntryE = unnamed_addr constant [5 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD2Ev to i8*), i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD0Ev to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*)], align 8 +@_ZL20PrettyStackTraceHead = internal thread_local unnamed_addr global %"class.llvm::PrettyStackTraceEntry"* null, align 8 +@.str = private unnamed_addr constant [87 x i8] c"PrettyStackTraceHead == this && \22Pretty stack trace entry destruction is out of order\22\00", align 1 +@.str1 = private unnamed_addr constant [64 x i8] c"/home/wschmidt/llvm/llvm-test2/lib/Support/PrettyStackTrace.cpp\00", align 1 +@__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev = private unnamed_addr constant [62 x i8] c"virtual llvm::PrettyStackTraceEntry::~PrettyStackTraceEntry()\00", align 1 + +declare void @_ZN4llvm21PrettyStackTraceEntryD2Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr +declare void @__cxa_pure_virtual() +declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*) +declare void @_ZdlPv(i8*) + +define void @_ZN4llvm21PrettyStackTraceEntryD0Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr align 2 { +entry: + %0 = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @_ZTVN4llvm21PrettyStackTraceEntryE, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + %1 = load %"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead, align 8 + %cmp.i = icmp eq %"class.llvm::PrettyStackTraceEntry"* %1, %this + br i1 %cmp.i, label %_ZN4llvm21PrettyStackTraceEntryD2Ev.exit, label %cond.false.i + +cond.false.i: ; preds = %entry + tail call void @__assert_fail(i8* getelementptr inbounds ([87 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([64 x i8]* @.str1, i64 0, i64 0), i32 zeroext 119, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev, i64 0, i64 0)) + unreachable + +_ZN4llvm21PrettyStackTraceEntryD2Ev.exit: ; preds = %entry + %NextEntry.i.i = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 1 + %2 = bitcast %"class.llvm::PrettyStackTraceEntry"** %NextEntry.i.i to i64* + %3 = load i64* %2, align 8 + store i64 %3, i64* bitcast (%"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead to i64*), align 8 + %4 = bitcast %"class.llvm::PrettyStackTraceEntry"* %this to i8* + tail call void @_ZdlPv(i8* %4) + ret void +} + +; CHECK-LABEL: _ZN4llvm21PrettyStackTraceEntryD0Ev: +; CHECK: addis [[REG1:[0-9]+]], 2, _ZL20PrettyStackTraceHead@got@tlsld@ha +; CHECK: addi 3, [[REG1]], _ZL20PrettyStackTraceHead@got@tlsld@l +; CHECK: bl __tls_get_addr(_ZL20PrettyStackTraceHead@tlsld) +; CHECK: addis 3, 3, _ZL20PrettyStackTraceHead@dtprel@ha +; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3) +; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3) diff --git a/llvm/test/CodeGen/PowerPC/tls-store2.ll b/llvm/test/CodeGen/PowerPC/tls-store2.ll index f884dd8a0a1..a9c97b5e23e 100644 --- a/llvm/test/CodeGen/PowerPC/tls-store2.ll +++ b/llvm/test/CodeGen/PowerPC/tls-store2.ll @@ -19,13 +19,11 @@ entry: } ; CHECK-LABEL: call_once: -; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha -; CHECK: addi 3, 3, __once_callable@got@tlsgd@l +; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l ; CHECK: bl __tls_get_addr(__once_callable@tlsgd) ; CHECK-NEXT: nop ; CHECK: std {{[0-9]+}}, 0(3) -; CHECK: addis 3, 2, __once_call@got@tlsgd@ha -; CHECK: addi 3, 3, __once_call@got@tlsgd@l +; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l ; CHECK: bl __tls_get_addr(__once_call@tlsgd) ; CHECK-NEXT: nop ; CHECK: std {{[0-9]+}}, 0(3) |