diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 13 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 28 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll | 14 |
8 files changed, 94 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 7109913a0b6..7884c4c7b19 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3037,6 +3037,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(ATOMIC_CMP_SWAP) NODE_NAME_CASE(ATOMIC_INC) NODE_NAME_CASE(ATOMIC_DEC) + NODE_NAME_CASE(BUFFER_LOAD) + NODE_NAME_CASE(BUFFER_LOAD_FORMAT) case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break; } return nullptr; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index f01afefae55..5cc5efb331e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -325,6 +325,8 @@ enum NodeType : unsigned { ATOMIC_CMP_SWAP, ATOMIC_INC, ATOMIC_DEC, + BUFFER_LOAD, + BUFFER_LOAD_FORMAT, LAST_AMDGPU_ISD_NUMBER }; diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 928b5d2d5d3..45a7fe6d343 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -759,12 +759,12 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, >; } -defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">; -defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">; -defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">; -defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load, f32, "BUFFER_LOAD_DWORD">; -defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load, v2f32, "BUFFER_LOAD_DWORDX2">; -defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load, v4f32, "BUFFER_LOAD_DWORDX4">; +defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">; +defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">; +defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">; +defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">; +defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">; +defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">; multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode> { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 82788ccecdd..978825c1051 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2650,6 +2650,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + SDLoc DL(Op); switch (IntrID) { case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: { @@ -2665,6 +2666,31 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, return DAG.getMemIntrinsicNode(Opc, SDLoc(Op), M->getVTList(), Ops, M->getMemoryVT(), M->getMemOperand()); } + case Intrinsic::amdgcn_buffer_load: + case Intrinsic::amdgcn_buffer_load_format: { + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // rsrc + Op.getOperand(3), // vindex + Op.getOperand(4), // offset + Op.getOperand(5), // glc + Op.getOperand(6) // slc + }; + MachineFunction &MF = DAG.getMachineFunction(); + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + + unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ? + AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; + EVT VT = Op.getValueType(); + EVT IntVT = VT.changeTypeToInteger(); + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(MFI->getBufferPSV()), + MachineMemOperand::MOLoad, + VT.getStoreSize(), VT.getStoreSize()); + + return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO); + } default: return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 0074bca25fc..7fb3f42a8a6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -58,6 +58,19 @@ def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", [SDNPMayStore, SDNPMemOperand, SDNPHasChain] >; +def SDTBufferLoad : SDTypeProfile<1, 5, + [ // vdata + SDTCisVT<1, v4i32>, // rsrc + SDTCisVT<2, i32>, // vindex + SDTCisVT<3, i32>, // offset + SDTCisVT<4, i1>, // glc + SDTCisVT<5, i1>]>; // slc + +def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, + [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; +def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad, + [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; + def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT", SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>, SDTCisVT<3, i32>]> diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 0b92198f20a..f4c210f1f2c 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -47,6 +47,29 @@ public: } }; +class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { +public: + explicit AMDGPUBufferPseudoSourceValue() : + PseudoSourceValue(PseudoSourceValue::TargetCustom) { } + + bool isConstant(const MachineFrameInfo *) const override { + // This should probably be true for most images, but we will start by being + // conservative. + return false; + } + + bool isAliased(const MachineFrameInfo *) const override { + // FIXME: If we ever change image intrinsics to accept fat pointers, then + // this could be true for some cases. + return false; + } + + bool mayAlias(const MachineFrameInfo*) const override { + // FIXME: If we ever change image intrinsics to accept fat pointers, then + // this could be true for some cases. + return false; + } +}; /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. @@ -98,6 +121,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // Stack object indices for work item IDs. std::array<int, 3> DebuggerWorkItemIDStackObjectIndices; + AMDGPUBufferPseudoSourceValue BufferPSV; std::unique_ptr<AMDGPUImagePseudoSourceValue> ImagePSV; public: @@ -462,6 +486,10 @@ public: llvm_unreachable("unexpected dimension"); } + const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { + return &BufferPSV; + } + AMDGPUImagePseudoSourceValue *getImagePSV() { return ImagePSV.get(); } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll index 67c7baba3e1..9cb9f25520b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll @@ -32,10 +32,10 @@ main_body: ;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 offen ;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 61 offset:4095 ;VI-DAG: s_movk_i32 [[OFS1:s[0-9]+]], 0x7fff -;VI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS1]] offset:4093 +;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS1]] offset:4093 ;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 offen -;VI: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff -;VI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS2]] offset:1 +;VI-DAG: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff +;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS2]] offset:1 ;CHECK: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll index 010ad276da1..5c93ae0e786 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll @@ -112,6 +112,20 @@ main_body: ret <4 x float> %data } +; SI won't merge ds memory operations, because of the signed offset bug, so +; we only have check lines for VI. +; CHECK-LABEL: buffer_load_mmo: +; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 +; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 +define amdgpu_ps float @buffer_load_mmo(<4 x i32> inreg %rsrc, float addrspace(3)* %lds) { +entry: + store float 0.0, float addrspace(3)* %lds + %val = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0) + %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 + store float 0.0, float addrspace(3)* %tmp2 + ret float %val +} + declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #0 declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #0 declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0 |