diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 87 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 5 |
4 files changed, 94 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 1bc6be45056..2cdd691fc10 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -106,12 +106,13 @@ private: MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; - SDNode *glueCopyToM0(SDNode *N) const; + SDNode *glueCopyToM0LDSInit(SDNode *N) const; + SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); - bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, + bool isDSOffsetLegal(SDValue Base, unsigned Offset, unsigned OffsetBits) const; bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, @@ -209,6 +210,7 @@ private: void SelectBRCOND(SDNode *N); void SelectFMAD_FMA(SDNode *N); void SelectATOMIC_CMP_SWAP(SDNode *N); + void SelectINTRINSIC_W_CHAIN(SDNode *N); protected: // Include the pieces autogenerated from the target description. @@ -339,29 +341,32 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, } } -SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { - if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS || - !Subtarget->ldsRequiresM0Init()) - return N; - +SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { const SITargetLowering& Lowering = - *static_cast<const SITargetLowering*>(getTargetLowering()); + *static_cast<const SITargetLowering*>(getTargetLowering()); // Write max value to m0 before each load operation SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), - CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); + Val); SDValue Glue = M0.getValue(1); SmallVector <SDValue, 8> Ops; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - Ops.push_back(N->getOperand(i)); - } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Ops.push_back(N->getOperand(i)); + Ops.push_back(Glue); return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); } +SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const { + if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS || + !Subtarget->ldsRequiresM0Init()) + return N; + return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); +} + MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm, EVT VT) const { SDNode *Lo = CurDAG->getMachineNode( @@ -472,7 +477,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { Opc == ISD::ATOMIC_LOAD_FADD || Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) - N = glueCopyToM0(N); + N = glueCopyToM0LDSInit(N); switch (Opc) { default: @@ -570,7 +575,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::STORE: case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: { - N = glueCopyToM0(N); + N = glueCopyToM0LDSInit(N); break; } @@ -648,6 +653,12 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectCode(N); return; } + + break; + } + case ISD::INTRINSIC_W_CHAIN: { + SelectINTRINSIC_W_CHAIN(N); + return; } } @@ -828,7 +839,7 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); } -bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, +bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset, unsigned OffsetBits) const { if ((OffsetBits == 16 && !isUInt<16>(Offset)) || (OffsetBits == 8 && !isUInt<8>(Offset))) @@ -1760,6 +1771,52 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { CurDAG->RemoveDeadNode(N); } +void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { + unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + if ((IntrID != Intrinsic::amdgcn_ds_append && + IntrID != Intrinsic::amdgcn_ds_consume) || + N->getValueType(0) != MVT::i32) { + SelectCode(N); + return; + } + + // The address is assumed to be uniform, so if it ends up in a VGPR, it will + // be copied to an SGPR with readfirstlane. + unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ? + AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME; + + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(2); + MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); + bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; + + SDValue Offset; + if (CurDAG->isBaseWithConstantOffset(Ptr)) { + SDValue PtrBase = Ptr.getOperand(0); + SDValue PtrOffset = Ptr.getOperand(1); + + const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue(); + if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) { + N = glueCopyToM0(N, PtrBase); + Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); + } + } + + if (!Offset) { + N = glueCopyToM0(N, Ptr); + Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); + } + + SDValue Ops[] = { + Offset, + CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32), + Chain, + N->getOperand(N->getNumOperands() - 1) // New glue + }; + + CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); +} + bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const { Mods = 0; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index e973bb9b0a2..aaad8b6e48a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -109,7 +109,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { // modes on SI to know the high bits are 0 so pointer adds don't wrap. We // can't represent this with range metadata because it's only allowed for // integer types. - if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && + if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) && ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) continue; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0c1fca88235..b57b2d2fd20 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -926,7 +926,20 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } + case Intrinsic::amdgcn_ds_append: + case Intrinsic::amdgcn_ds_consume: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = CI.getOperand(0); + Info.align = 0; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(1)); + if (!Vol || !Vol->isZero()) + Info.flags |= MachineMemOperand::MOVolatile; + + return true; + } default: return false; } @@ -1978,7 +1991,8 @@ SDValue SITargetLowering::LowerFormalArguments( auto *ParamTy = dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex())); if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && - ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + ParamTy && (ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + ParamTy->getAddressSpace() == AMDGPUAS::REGION_ADDRESS)) { // On SI local pointers are just offsets into LDS, so they are always // less than 16-bits. On CI and newer they could potentially be // real pointers, so we can't guarantee their size. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 88bab949a81..45687d4486e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -275,6 +275,11 @@ bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt, if (OffsetImm) { // Normal, single offset LDS instruction. BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr); + // TODO: ds_consume/ds_append use M0 for the base address. Is it safe to + // report that here? + if (!BaseOp) + return false; + Offset = OffsetImm->getImm(); assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base " "operands of type register."); |

