diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-12-29 17:18:21 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-12-29 17:18:21 +0000 |
commit | e19bc2ee0fb20f05a9acf53b9e18f59c3b130960 (patch) | |
tree | 19a3f8114b1bd5a29a3782cb48394d5606be2bf8 | |
parent | d94b63d7659c9c327dbfe63ba9e1d0a4447cb253 (diff) | |
download | bcm5719-llvm-e19bc2ee0fb20f05a9acf53b9e18f59c3b130960.tar.gz bcm5719-llvm-e19bc2ee0fb20f05a9acf53b9e18f59c3b130960.zip |
AMDGPU: Use unique PSVs for buffer resources
Also fixes using the wrong memory type for some
intrinsics when custom lowering them.
llvm-svn: 321557
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 110 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 15 |
4 files changed, 93 insertions, 45 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index bd570c86b72..22a3a0fe618 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -462,7 +462,7 @@ class AMDGPUBufferLoad : Intrinsic < llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrReadMem]>; + [IntrReadMem], "", [SDNPMemOperand]>; def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; def int_amdgcn_buffer_load : AMDGPUBufferLoad; @@ -474,7 +474,7 @@ class AMDGPUBufferStore : Intrinsic < llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrWriteMem]>; + [IntrWriteMem], "", [SDNPMemOperand]>; def int_amdgcn_buffer_store_format : AMDGPUBufferStore; def int_amdgcn_buffer_store : AMDGPUBufferStore; @@ -489,7 +489,7 @@ def int_amdgcn_tbuffer_load : Intrinsic < llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - []>; + [IntrReadMem], "", [SDNPMemOperand]>; def int_amdgcn_tbuffer_store : Intrinsic < [], @@ -503,7 +503,7 @@ def int_amdgcn_tbuffer_store : Intrinsic < llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - []>; + [IntrWriteMem], "", [SDNPMemOperand]>; class AMDGPUBufferAtomic : Intrinsic < [llvm_i32_ty], @@ -512,7 +512,7 @@ class AMDGPUBufferAtomic : Intrinsic < llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - []>; + [], "", [SDNPMemOperand]>; def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic; @@ -531,7 +531,7 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic< llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - []>; + [], "", [SDNPMemOperand]>; // Uses that do not set the done bit should set IntrWriteMem on the // call site. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 9c96fb8737d..415d8a512aa 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -726,6 +726,70 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.flags |= MachineMemOperand::MOVolatile; return true; } + case Intrinsic::amdgcn_tbuffer_load: + case Intrinsic::amdgcn_buffer_load: + case Intrinsic::amdgcn_buffer_load_format: { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = MFI->getBufferPSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(0)); + Info.memVT = MVT::getVT(CI.getType()); + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable; + + // There is a constant offset component, but there are additional register + // offsets which could break AA if we set the offset to anything non-0. + return true; + } + case Intrinsic::amdgcn_tbuffer_store: + case Intrinsic::amdgcn_buffer_store: + case Intrinsic::amdgcn_buffer_store_format: { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_VOID; + Info.ptrVal = MFI->getBufferPSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(1)); + Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); + Info.flags = MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + return true; + } + case Intrinsic::amdgcn_buffer_atomic_swap: + case Intrinsic::amdgcn_buffer_atomic_add: + case Intrinsic::amdgcn_buffer_atomic_sub: + case Intrinsic::amdgcn_buffer_atomic_smin: + case Intrinsic::amdgcn_buffer_atomic_umin: + case Intrinsic::amdgcn_buffer_atomic_smax: + case Intrinsic::amdgcn_buffer_atomic_umax: + case Intrinsic::amdgcn_buffer_atomic_and: + case Intrinsic::amdgcn_buffer_atomic_or: + case Intrinsic::amdgcn_buffer_atomic_xor: { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = MFI->getBufferPSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(1)); + Info.memVT = MVT::getVT(CI.getType()); + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOVolatile; + return true; + } + case Intrinsic::amdgcn_buffer_atomic_cmpswap: { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = MFI->getBufferPSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(2)); + Info.memVT = MVT::getVT(CI.getType()); + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOVolatile; + return true; + } default: return false; } @@ -4396,7 +4460,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); SDLoc DL(Op); - MachineFunction &MF = DAG.getMachineFunction(); switch (IntrID) { case Intrinsic::amdgcn_atomic_inc: @@ -4423,21 +4486,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // glc Op.getOperand(6) // slc }; - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ? AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; EVT VT = Op.getValueType(); EVT IntVT = VT.changeTypeToInteger(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(MFI->getBufferPSV()), - MachineMemOperand::MOLoad, - VT.getStoreSize(), VT.getStoreSize()); - - return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO); + auto *M = cast<MemSDNode>(Op); + return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, + M->getMemOperand()); } case Intrinsic::amdgcn_tbuffer_load: { + MemSDNode *M = cast<MemSDNode>(Op); SDValue Ops[] = { Op.getOperand(0), // Chain Op.getOperand(2), // rsrc @@ -4451,14 +4511,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(10) // slc }; - EVT VT = Op.getOperand(2).getValueType(); + EVT VT = Op.getValueType(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(), - MachineMemOperand::MOLoad, - VT.getStoreSize(), VT.getStoreSize()); return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL, - Op->getVTList(), Ops, VT, MMO); + Op->getVTList(), Ops, VT, M->getMemOperand()); } case Intrinsic::amdgcn_buffer_atomic_swap: case Intrinsic::amdgcn_buffer_atomic_add: @@ -4478,14 +4534,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // offset Op.getOperand(6) // slc }; - EVT VT = Op.getOperand(3).getValueType(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(), - MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOVolatile, - VT.getStoreSize(), 4); + EVT VT = Op.getValueType(); + + auto *M = cast<MemSDNode>(Op); unsigned Opcode = 0; switch (IntrID) { @@ -4523,7 +4574,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, llvm_unreachable("unhandled atomic opcode"); } - return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO); + return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, + M->getMemOperand()); } case Intrinsic::amdgcn_buffer_atomic_cmpswap: { @@ -4536,17 +4588,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(6), // offset Op.getOperand(7) // slc }; - EVT VT = Op.getOperand(4).getValueType(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(), - MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOVolatile, - VT.getStoreSize(), 4); + EVT VT = Op.getValueType(); + auto *M = cast<MemSDNode>(Op); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, - Op->getVTList(), Ops, VT, MMO); + Op->getVTList(), Ops, VT, M->getMemOperand()); } // Basic sample. diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 68acfa4c6ea..888d8f978af 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -28,7 +28,6 @@ using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), - BufferPSV(*(MF.getSubtarget().getInstrInfo())), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 7ac755be04d..02e63f0258e 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -137,12 +137,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // Stack object indices for work item IDs. std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; - AMDGPUBufferPseudoSourceValue BufferPSV; - + DenseMap<const Value *, + std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; DenseMap<const Value *, std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; - private: unsigned LDSWaveSpillSize = 0; unsigned NumUserSGPRs = 0; @@ -634,9 +633,13 @@ public: return LDSWaveSpillSize; } - // FIXME: These should be unique - const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { - return &BufferPSV; + const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, + const Value *BufferRsrc) { + assert(BufferRsrc); + auto PSV = BufferPSVs.try_emplace( + BufferRsrc, + llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII)); + return PSV.first->second.get(); } const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, |