summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp31
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp22
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td10
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp4
9 files changed, 102 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 5578251bcfc..bad5670010a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -298,10 +298,37 @@ void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
}
void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
+ OutContext.reportError({},
+ Twine(GV->getName()) +
+ ": unsupported initializer for address space");
+ return;
+ }
+
+ // LDS variables aren't emitted in HSA or PAL yet.
+ const Triple::OSType OS = TM.getTargetTriple().getOS();
+ if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
+ return;
+
+ MCSymbol *GVSym = getSymbol(GV);
- // Group segment variables aren't emitted in HSA.
- if (AMDGPU::isGroupSegment(GV))
+ GVSym->redefineIfPossible();
+ if (GVSym->isDefined() || GVSym->isVariable())
+ report_fatal_error("symbol '" + Twine(GVSym->getName()) +
+ "' is already defined");
+
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
+ unsigned Align = GV->getAlignment();
+ if (!Align)
+ Align = 4;
+
+ EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+ EmitLinkage(GV, GVSym);
+ getTargetStreamer()->emitAMDGPULDS(GVSym, Size, Align);
return;
+ }
AsmPrinter::EmitGlobalVariable(GV);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 1197893120a..d0af336a00b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4357,6 +4357,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
+ NODE_NAME_CASE(LDS)
NODE_NAME_CASE(KILL)
NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
@@ -4571,6 +4572,15 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
Known.Zero.setHighBits(16);
break;
}
+ case AMDGPUISD::LDS: {
+ auto GA = cast<GlobalAddressSDNode>(Op.getOperand(0).getNode());
+ unsigned Align = GA->getGlobal()->getAlignment();
+
+ Known.Zero.setHighBits(16);
+ if (Align)
+ Known.Zero.setLowBits(Log2_32(Align));
+ break;
+ }
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (IID) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index a17f5dae576..9723fc3ec66 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -485,6 +485,7 @@ enum NodeType : unsigned {
INTERP_P1LV_F16,
INTERP_P2_F16,
PC_ADD_REL_OFFSET,
+ LDS,
KILL,
DUMMY_CHAIN,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 78e6e39b05a..74ed6f1fed1 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -50,7 +50,7 @@ struct FoldCandidate {
} else if (FoldOp->isFI()) {
FrameIndexToFold = FoldOp->getIndex();
} else {
- assert(FoldOp->isReg());
+ assert(FoldOp->isReg() || FoldOp->isGlobal());
OpToFold = FoldOp;
}
}
@@ -67,6 +67,8 @@ struct FoldCandidate {
return Kind == MachineOperand::MO_Register;
}
+ bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
+
bool isCommuted() const {
return Commuted;
}
@@ -230,7 +232,7 @@ static bool updateOperand(FoldCandidate &Fold,
}
}
- if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) {
+ if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
MachineBasicBlock *MBB = MI->getParent();
auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
if (Liveness != MachineBasicBlock::LQR_Dead)
@@ -277,6 +279,12 @@ static bool updateOperand(FoldCandidate &Fold,
return true;
}
+ if (Fold.isGlobal()) {
+ Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
+ Fold.OpToFold->getTargetFlags());
+ return true;
+ }
+
if (Fold.isFI()) {
Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
return true;
@@ -368,7 +376,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if ((Opc == AMDGPU::V_ADD_I32_e64 ||
Opc == AMDGPU::V_SUB_I32_e64 ||
Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
- (OpToFold->isImm() || OpToFold->isFI())) {
+ (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
// Verify the other operand is a VGPR, otherwise we would violate the
@@ -483,7 +491,8 @@ void SIFoldOperands::foldOperand(
return;
}
- bool FoldingImmLike = OpToFold.isImm() || OpToFold.isFI();
+ bool FoldingImmLike =
+ OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
if (FoldingImmLike && UseMI->isCopy()) {
unsigned DestReg = UseMI->getOperand(0).getReg();
@@ -884,7 +893,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
SmallVector<FoldCandidate, 4> FoldList;
MachineOperand &Dst = MI.getOperand(0);
- bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
+ bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
if (FoldingImm) {
unsigned NumLiteralUses = 0;
MachineOperand *NonInlineUse = nullptr;
@@ -1232,7 +1241,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
}
MachineOperand &OpToFold = MI.getOperand(1);
- bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
+ bool FoldingImm =
+ OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
// FIXME: We could also be folding things like TargetIndexes.
if (!FoldingImm && !OpToFold.isReg())
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 64018fd92ac..25000506f2c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3588,6 +3588,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
case AMDGPU::GET_GROUPSTATICSIZE: {
+ assert(getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
+ getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL);
DebugLoc DL = MI.getDebugLoc();
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
.add(MI.getOperand(0))
@@ -4776,7 +4778,10 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GSD->getGlobal();
- if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ if ((GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ (!GV->hasExternalLinkage() ||
+ getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
+ getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL)) ||
GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS ||
GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
@@ -4784,7 +4789,12 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDLoc DL(GSD);
EVT PtrVT = Op.getValueType();
- // FIXME: Should not make address space based decisions here.
+ if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, GSD->getOffset(),
+ SIInstrInfo::MO_ABS32_LO);
+ return DAG.getNode(AMDGPUISD::LDS, DL, MVT::i32, GA);
+ }
+
if (shouldEmitFixup(GV))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
else if (shouldEmitPCReloc(GV))
@@ -5773,6 +5783,18 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return SDValue(DAG.getMachineNode(AMDGPU::SI_IF_BREAK, DL, VT,
Op->getOperand(1), Op->getOperand(2)), 0);
+ case Intrinsic::amdgcn_groupstaticsize: {
+ Triple::OSType OS = getTargetMachine().getTargetTriple().getOS();
+ if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
+ return Op;
+
+ const Module *M = MF.getFunction().getParent();
+ const GlobalValue *GV =
+ M->getNamedValue(Intrinsic::getName(Intrinsic::amdgcn_groupstaticsize));
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
+ SIInstrInfo::MO_ABS32_LO);
+ return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
+ }
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f977928735a..1923ac24f85 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2703,7 +2703,7 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
const MCInstrDesc &InstDesc = MI.getDesc();
const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo];
- assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
+ assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal());
if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
return true;
@@ -3012,7 +3012,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
switch (Desc.OpInfo[i].OperandType) {
case MCOI::OPERAND_REGISTER:
- if (MI.getOperand(i).isImm()) {
+ if (MI.getOperand(i).isImm() || MI.getOperand(i).isGlobal()) {
ErrInfo = "Illegal immediate value for operand.";
return false;
}
@@ -3682,7 +3682,7 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
return isLegalRegOperand(MRI, OpInfo, MO);
// Handle non-register types that are treated like immediates.
- assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
+ assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal());
return true;
}
@@ -3739,7 +3739,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
}
// Handle non-register types that are treated like immediates.
- assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
+ assert(MO->isImm() || MO->isTargetIndex() || MO->isFI() || MO->isGlobal());
if (!DefinedRC) {
// This operand expects an immediate.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 1a3e16afce3..cd1c7fdae92 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -205,6 +205,10 @@ def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
>;
+def SIlds : SDNode<"AMDGPUISD::LDS",
+ SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
+>;
+
def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO",
SIload_d16,
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 630aeeb8777..2cb7e9fa357 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1142,6 +1142,16 @@ def : GCNPat <
(S_MOV_B32 imm:$imm)
>;
+def : GCNPat <
+ (VGPRImm<(SIlds tglobaladdr:$ga)>),
+ (V_MOV_B32_e32 $ga)
+>;
+
+def : GCNPat <
+ (SIlds tglobaladdr:$ga),
+ (S_MOV_B32 $ga)
+>;
+
// FIXME: Workaround for ordering issue with peephole optimizer where
// a register class copy interferes with immediate folding. Should
// use s_mov_b32, which can be shrunk to s_movk_i32
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index c180afdcd45..7ee178149c7 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -95,6 +95,10 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
Src0.setSubReg(0);
Src0.ChangeToFrameIndex(MovSrc.getIndex());
ConstantFolded = true;
+ } else if (MovSrc.isGlobal()) {
+ Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
+ MovSrc.getTargetFlags());
+ ConstantFolded = true;
}
if (ConstantFolded) {
OpenPOWER on IntegriCloud