diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 31 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 22 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 4 |
9 files changed, 102 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 5578251bcfc..bad5670010a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -298,10 +298,37 @@ void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { } void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) { + OutContext.reportError({}, + Twine(GV->getName()) + + ": unsupported initializer for address space"); + return; + } + + // LDS variables aren't emitted in HSA or PAL yet. + const Triple::OSType OS = TM.getTargetTriple().getOS(); + if (OS == Triple::AMDHSA || OS == Triple::AMDPAL) + return; + + MCSymbol *GVSym = getSymbol(GV); - // Group segment variables aren't emitted in HSA. - if (AMDGPU::isGroupSegment(GV)) + GVSym->redefineIfPossible(); + if (GVSym->isDefined() || GVSym->isVariable()) + report_fatal_error("symbol '" + Twine(GVSym->getName()) + + "' is already defined"); + + const DataLayout &DL = GV->getParent()->getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GV->getValueType()); + unsigned Align = GV->getAlignment(); + if (!Align) + Align = 4; + + EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); + EmitLinkage(GV, GVSym); + getTargetStreamer()->emitAMDGPULDS(GVSym, Size, Align); return; + } AsmPrinter::EmitGlobalVariable(GV); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 1197893120a..d0af336a00b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4357,6 +4357,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) + NODE_NAME_CASE(LDS) NODE_NAME_CASE(KILL) NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; @@ -4571,6 +4572,15 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( Known.Zero.setHighBits(16); break; } + case AMDGPUISD::LDS: { + auto GA = cast<GlobalAddressSDNode>(Op.getOperand(0).getNode()); + unsigned Align = GA->getGlobal()->getAlignment(); + + Known.Zero.setHighBits(16); + if (Align) + Known.Zero.setLowBits(Log2_32(Align)); + break; + } case ISD::INTRINSIC_WO_CHAIN: { unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); switch (IID) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index a17f5dae576..9723fc3ec66 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -485,6 +485,7 @@ enum NodeType : unsigned { INTERP_P1LV_F16, INTERP_P2_F16, PC_ADD_REL_OFFSET, + LDS, KILL, DUMMY_CHAIN, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 78e6e39b05a..74ed6f1fed1 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -50,7 +50,7 @@ struct FoldCandidate { } else if (FoldOp->isFI()) { FrameIndexToFold = FoldOp->getIndex(); } else { - assert(FoldOp->isReg()); + assert(FoldOp->isReg() || FoldOp->isGlobal()); OpToFold = FoldOp; } } @@ -67,6 +67,8 @@ struct FoldCandidate { return Kind == MachineOperand::MO_Register; } + bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; } + bool isCommuted() const { return Commuted; } @@ -230,7 +232,7 @@ static bool updateOperand(FoldCandidate &Fold, } } - if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) { + if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) { MachineBasicBlock *MBB = MI->getParent(); auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); if (Liveness != MachineBasicBlock::LQR_Dead) @@ -277,6 +279,12 @@ static bool updateOperand(FoldCandidate &Fold, return true; } + if (Fold.isGlobal()) { + Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(), + Fold.OpToFold->getTargetFlags()); + return true; + } + if (Fold.isFI()) { Old.ChangeToFrameIndex(Fold.FrameIndexToFold); return true; @@ -368,7 +376,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList, if ((Opc == AMDGPU::V_ADD_I32_e64 || Opc == AMDGPU::V_SUB_I32_e64 || Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME - (OpToFold->isImm() || OpToFold->isFI())) { + (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) { MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); // Verify the other operand is a VGPR, otherwise we would violate the @@ -483,7 +491,8 @@ void SIFoldOperands::foldOperand( return; } - bool FoldingImmLike = OpToFold.isImm() || OpToFold.isFI(); + bool FoldingImmLike = + OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal(); if (FoldingImmLike && UseMI->isCopy()) { unsigned DestReg = UseMI->getOperand(0).getReg(); @@ -884,7 +893,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, SmallVector<FoldCandidate, 4> FoldList; MachineOperand &Dst = MI.getOperand(0); - bool FoldingImm = OpToFold.isImm() || OpToFold.isFI(); + bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal(); if (FoldingImm) { unsigned NumLiteralUses = 0; MachineOperand *NonInlineUse = nullptr; @@ -1232,7 +1241,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { } MachineOperand &OpToFold = MI.getOperand(1); - bool FoldingImm = OpToFold.isImm() || OpToFold.isFI(); + bool FoldingImm = + OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal(); // FIXME: We could also be folding things like TargetIndexes. if (!FoldingImm && !OpToFold.isReg()) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 64018fd92ac..25000506f2c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3588,6 +3588,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( } case AMDGPU::GET_GROUPSTATICSIZE: { + assert(getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA || + getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL); DebugLoc DL = MI.getDebugLoc(); BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32)) .add(MI.getOperand(0)) @@ -4776,7 +4778,10 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GSD->getGlobal(); - if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + if ((GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && + (!GV->hasExternalLinkage() || + getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA || + getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL)) || GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS || GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); @@ -4784,7 +4789,12 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDLoc DL(GSD); EVT PtrVT = Op.getValueType(); - // FIXME: Should not make address space based decisions here. + if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, GSD->getOffset(), + SIInstrInfo::MO_ABS32_LO); + return DAG.getNode(AMDGPUISD::LDS, DL, MVT::i32, GA); + } + if (shouldEmitFixup(GV)) return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT); else if (shouldEmitPCReloc(GV)) @@ -5773,6 +5783,18 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return SDValue(DAG.getMachineNode(AMDGPU::SI_IF_BREAK, DL, VT, Op->getOperand(1), Op->getOperand(2)), 0); + case Intrinsic::amdgcn_groupstaticsize: { + Triple::OSType OS = getTargetMachine().getTargetTriple().getOS(); + if (OS == Triple::AMDHSA || OS == Triple::AMDPAL) + return Op; + + const Module *M = MF.getFunction().getParent(); + const GlobalValue *GV = + M->getNamedValue(Intrinsic::getName(Intrinsic::amdgcn_groupstaticsize)); + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0, + SIInstrInfo::MO_ABS32_LO); + return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0}; + } default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index f977928735a..1923ac24f85 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2703,7 +2703,7 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MCInstrDesc &InstDesc = MI.getDesc(); const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo]; - assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); + assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal()); if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) return true; @@ -3012,7 +3012,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, switch (Desc.OpInfo[i].OperandType) { case MCOI::OPERAND_REGISTER: - if (MI.getOperand(i).isImm()) { + if (MI.getOperand(i).isImm() || MI.getOperand(i).isGlobal()) { ErrInfo = "Illegal immediate value for operand."; return false; } @@ -3682,7 +3682,7 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, return isLegalRegOperand(MRI, OpInfo, MO); // Handle non-register types that are treated like immediates. - assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); + assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal()); return true; } @@ -3739,7 +3739,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, } // Handle non-register types that are treated like immediates. - assert(MO->isImm() || MO->isTargetIndex() || MO->isFI()); + assert(MO->isImm() || MO->isTargetIndex() || MO->isFI() || MO->isGlobal()); if (!DefinedRC) { // This operand expects an immediate. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 1a3e16afce3..cd1c7fdae92 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -205,6 +205,10 @@ def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> >; +def SIlds : SDNode<"AMDGPUISD::LDS", + SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> +>; + def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO", SIload_d16, [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 630aeeb8777..2cb7e9fa357 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1142,6 +1142,16 @@ def : GCNPat < (S_MOV_B32 imm:$imm) >; +def : GCNPat < + (VGPRImm<(SIlds tglobaladdr:$ga)>), + (V_MOV_B32_e32 $ga) +>; + +def : GCNPat < + (SIlds tglobaladdr:$ga), + (S_MOV_B32 $ga) +>; + // FIXME: Workaround for ordering issue with peephole optimizer where // a register class copy interferes with immediate folding. Should // use s_mov_b32, which can be shrunk to s_movk_i32 diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index c180afdcd45..7ee178149c7 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -95,6 +95,10 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, Src0.setSubReg(0); Src0.ChangeToFrameIndex(MovSrc.getIndex()); ConstantFolded = true; + } else if (MovSrc.isGlobal()) { + Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(), + MovSrc.getTargetFlags()); + ConstantFolded = true; } if (ConstantFolded) { |