diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 33 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp | 19 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 34 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 6 |
13 files changed, 76 insertions, 47 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 7dc8fd5f518..37f6efa315f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2779,6 +2779,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CVT_F32_UBYTE3) NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) NODE_NAME_CASE(CONST_DATA_PTR) + NODE_NAME_CASE(PC_ADD_REL_OFFSET) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) NODE_NAME_CASE(INTERP_MOV) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 824732ad699..ecf69f8779c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -297,6 +297,7 @@ enum NodeType : unsigned { INTERP_MOV, INTERP_P1, INTERP_P2, + PC_ADD_REL_OFFSET, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 51114b3ee44..0fd17b41f7e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -70,7 +70,10 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName())); - MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx)); + const MCExpr *SymExpr = MCSymbolRefExpr::create(Sym, Ctx); + const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr, + MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + MCOp = MCOperand::createExpr(Expr); break; } case MachineOperand::MO_ExternalSymbol: { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 1e66b3b0934..98843c97b79 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -81,6 +81,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { return 2; case FK_SecRel_4: case FK_Data_4: + case FK_PCRel_4: return 4; case FK_SecRel_8: case FK_Data_8: @@ -105,27 +106,6 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, break; } - case AMDGPU::fixup_si_rodata: { - uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset()); - // We emit constant data at the end of the text section and generate its - // address using the following code sequence: - // s_getpc_b64 s[0:1] - // s_add_u32 s0, s0, $symbol - // s_addc_u32 s1, s1, 0 - // - // s_getpc_b64 returns the address of the s_add_u32 instruction and then - // the fixup replaces $symbol with a literal constant, which is a - // pc-relative offset from the encoding of the $symbol operand to the - // constant data. - // - // What we want here is an offset from the start of the s_add_u32 - // instruction to the constant data, but since the encoding of $symbol - // starts 4 bytes after the start of the add instruction, we end up - // with an offset that is 4 bytes too small. This requires us to - // add 4 to the fixup value before applying it. - *Dst = Value + 4; - break; - } default: { // FIXME: Copied from AArch64 unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); @@ -152,7 +132,6 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo( const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = { // name offset bits flags { "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_si_rodata", 0, 32, MCFixupKindInfo::FKF_IsPCRel } }; if (Kind < FirstTargetFixupKind) @@ -175,13 +154,15 @@ namespace { class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend { bool Is64Bit; + bool HasRelocationAddend; public: - ELFAMDGPUAsmBackend(const Target &T, bool Is64Bit) : - AMDGPUAsmBackend(T), Is64Bit(Is64Bit) { } + ELFAMDGPUAsmBackend(const Target &T, const Triple &TT) : + AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn), + HasRelocationAddend(TT.getOS() == Triple::AMDHSA) { } MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { - return createAMDGPUELFObjectWriter(Is64Bit, OS); + return createAMDGPUELFObjectWriter(Is64Bit, HasRelocationAddend, OS); } }; @@ -191,5 +172,5 @@ MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU) { // Use 64-bit ELF for amdgcn - return new ELFAMDGPUAsmBackend(T, TT.getArch() == Triple::amdgcn); + return new ELFAMDGPUAsmBackend(T, TT); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 4302737396f..3467ae765f3 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -18,7 +18,7 @@ namespace { class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter { public: - AMDGPUELFObjectWriter(bool Is64Bit); + AMDGPUELFObjectWriter(bool Is64Bit, bool HasRelocationAddend); protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override { @@ -30,11 +30,18 @@ protected: } // End anonymous namespace -AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit) - : MCELFObjectTargetWriter(Is64Bit, ELF::ELFOSABI_AMDGPU_HSA, - ELF::EM_AMDGPU, false) { } +AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit, + bool HasRelocationAddend) + : MCELFObjectTargetWriter(Is64Bit, + ELF::ELFOSABI_AMDGPU_HSA, + ELF::EM_AMDGPU, + HasRelocationAddend) { } -MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit, raw_pwrite_stream &OS) { - MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(Is64Bit); + +MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit, + bool HasRelocationAddend, + raw_pwrite_stream &OS) { + MCELFObjectTargetWriter *MOTW = + new AMDGPUELFObjectWriter(Is64Bit, HasRelocationAddend); return createELFObjectWriter(MOTW, OS, true); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h index a024f285cfa..20c1adfbc6b 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h @@ -18,9 +18,6 @@ enum Fixups { /// 16-bit PC relative fixup for SOPP branch instructions. fixup_si_sopp_br = FirstTargetFixupKind, - /// fixup for global addresses with constant initializers - fixup_si_rodata, - // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h index 5f76860133c..9ab7940812b 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -47,6 +47,7 @@ MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU); MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit, + bool HasRelocationAddend, raw_pwrite_stream &OS); } // End llvm namespace diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 533a54c1a8b..52787b64f7e 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -248,14 +248,13 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, return MRI.getEncodingValue(MO.getReg()); if (MO.isExpr()) { - const MCSymbolRefExpr *Expr = cast<MCSymbolRefExpr>(MO.getExpr()); - const MCSymbol &Sym = Expr->getSymbol(); + const MCSymbolRefExpr *Expr = dyn_cast<MCSymbolRefExpr>(MO.getExpr()); MCFixupKind Kind; - if (Sym.isExternal()) + if (Expr && Expr->getSymbol().isExternal()) Kind = FK_Data_4; else - Kind = (MCFixupKind)AMDGPU::fixup_si_rodata; - Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc())); + Kind = FK_PCRel_4; + Fixups.push_back(MCFixup::create(4, MO.getExpr(), Kind, MI.getLoc())); } // Figure out the operand number, needed for isSrcOperand check diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index a2b5722881e..3fd002696c9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1416,6 +1416,40 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, return DAG.getUNDEF(ASC->getValueType(0)); } +SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, + SDValue Op, + SelectionDAG &DAG) const { + GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op); + + if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) + return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); + + SDLoc DL(GSD); + const GlobalValue *GV = GSD->getGlobal(); + MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace()); + + // In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is + // lowered to the following code sequence: + // s_getpc_b64 s[0:1] + // s_add_u32 s0, s0, $symbol + // s_addc_u32 s1, s1, 0 + // + // s_getpc_b64 returns the address of the s_add_u32 instruction and then + // a fixup or relocation is emitted to replace $symbol with a literal + // constant, which is a pc-relative offset from the encoding of the $symbol + // operand to the global variable. + // + // What we want here is an offset from the value returned by s_getpc + // (which is the address of the s_add_u32 instruction) to the global + // variable, but since the encoding of $symbol starts 4 bytes after the start + // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too + // small. This requires us to add 4 to the global variable offset in order to + // compute the correct address. + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, + GSD->getOffset() + 4); + return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, GA); +} + SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const { // We can't use S_MOV_B32 directly, because there is no way to specify m0 as diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index bc85a0e6a37..20e30c079dc 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -23,7 +23,8 @@ namespace llvm { class SITargetLowering final : public AMDGPUTargetLowering { SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &DL, SDValue Chain, unsigned Offset, bool Signed) const; - + SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, + SelectionDAG &DAG) const override; SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, MVT VT, unsigned Offset) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index af246c07bf7..b33f07e6f22 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -914,7 +914,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { break; } - case AMDGPU::SI_CONSTDATA_PTR: { + case AMDGPU::SI_PC_ADD_REL_OFFSET: { const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(ST.getRegisterInfo()); MachineFunction &MF = *MBB.getParent(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index b2ed155f6c9..f0040ce3866 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -137,6 +137,10 @@ def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">; def SIsampled : SDSample<"AMDGPUISD::SAMPLED">; def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">; +def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", + SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> +>; + //===----------------------------------------------------------------------===// // PatFrags for FLAT instructions //===----------------------------------------------------------------------===// @@ -454,7 +458,7 @@ def sopp_brtarget : Operand<OtherVT> { let ParserMatchClass = SoppBrTarget; } -def const_ga : Operand<iPTR>; +def si_ga : Operand<iPTR>; def InterpSlot : Operand<i32> { let PrintMethod = "printInterpSlot"; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 61aa65c1caa..bc300b74d34 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2107,10 +2107,10 @@ defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>; let Defs = [SCC] in { -def SI_CONSTDATA_PTR : InstSI < +def SI_PC_ADD_REL_OFFSET : InstSI < (outs SReg_64:$dst), - (ins const_ga:$ptr), - "", [(set SReg_64:$dst, (i64 (AMDGPUconstdata_ptr (tglobaladdr:$ptr))))] + (ins si_ga:$ptr), + "", [(set SReg_64:$dst, (i64 (SIpc_add_rel_offset (tglobaladdr:$ptr))))] > { let SALU = 1; } |