diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-12-29 17:18:14 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-12-29 17:18:14 +0000 |
commit | 905f3518ba2adff6aee5271780359abbd5bcfae9 (patch) | |
tree | fcd5284336ffa17d1d44ef6c6dcb8e71f679e710 /llvm/lib | |
parent | 5a5e1caf25e6869694b3f3c78990a4eddfe3b620 (diff) | |
download | bcm5719-llvm-905f3518ba2adff6aee5271780359abbd5bcfae9.tar.gz bcm5719-llvm-905f3518ba2adff6aee5271780359abbd5bcfae9.zip |
AMDGPU: Implement getTgtMemIntrinsic for images
Currently all images are lowered to have a single
image PseudoSourceValue. Image stores happen to have
overly strict mayLoad/mayStore/hasSideEffects flags
set on them, so this happens to work. When these
are fixed to be correct, the scheduler breaks
this because the identical PSVs are assumed to
be the same address. These need to be unique
to the image resource value.
llvm-svn: 321555
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 169 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 19 |
3 files changed, 169 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 50ee88fa635..9c96fb8737d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -575,6 +575,157 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } + + // Image load. + case Intrinsic::amdgcn_image_load: + case Intrinsic::amdgcn_image_load_mip: + + // Sample. + case Intrinsic::amdgcn_image_sample: + case Intrinsic::amdgcn_image_sample_cl: + case Intrinsic::amdgcn_image_sample_d: + case Intrinsic::amdgcn_image_sample_d_cl: + case Intrinsic::amdgcn_image_sample_l: + case Intrinsic::amdgcn_image_sample_b: + case Intrinsic::amdgcn_image_sample_b_cl: + case Intrinsic::amdgcn_image_sample_lz: + case Intrinsic::amdgcn_image_sample_cd: + case Intrinsic::amdgcn_image_sample_cd_cl: + + // Sample with comparison. + case Intrinsic::amdgcn_image_sample_c: + case Intrinsic::amdgcn_image_sample_c_cl: + case Intrinsic::amdgcn_image_sample_c_d: + case Intrinsic::amdgcn_image_sample_c_d_cl: + case Intrinsic::amdgcn_image_sample_c_l: + case Intrinsic::amdgcn_image_sample_c_b: + case Intrinsic::amdgcn_image_sample_c_b_cl: + case Intrinsic::amdgcn_image_sample_c_lz: + case Intrinsic::amdgcn_image_sample_c_cd: + case Intrinsic::amdgcn_image_sample_c_cd_cl: + + // Sample with offsets. + case Intrinsic::amdgcn_image_sample_o: + case Intrinsic::amdgcn_image_sample_cl_o: + case Intrinsic::amdgcn_image_sample_d_o: + case Intrinsic::amdgcn_image_sample_d_cl_o: + case Intrinsic::amdgcn_image_sample_l_o: + case Intrinsic::amdgcn_image_sample_b_o: + case Intrinsic::amdgcn_image_sample_b_cl_o: + case Intrinsic::amdgcn_image_sample_lz_o: + case Intrinsic::amdgcn_image_sample_cd_o: + case Intrinsic::amdgcn_image_sample_cd_cl_o: + + // Sample with comparison and offsets. + case Intrinsic::amdgcn_image_sample_c_o: + case Intrinsic::amdgcn_image_sample_c_cl_o: + case Intrinsic::amdgcn_image_sample_c_d_o: + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + case Intrinsic::amdgcn_image_sample_c_l_o: + case Intrinsic::amdgcn_image_sample_c_b_o: + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + case Intrinsic::amdgcn_image_sample_c_lz_o: + case Intrinsic::amdgcn_image_sample_c_cd_o: + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + + // Basic gather4 + case Intrinsic::amdgcn_image_gather4: + case Intrinsic::amdgcn_image_gather4_cl: + case Intrinsic::amdgcn_image_gather4_l: + case Intrinsic::amdgcn_image_gather4_b: + case Intrinsic::amdgcn_image_gather4_b_cl: + case Intrinsic::amdgcn_image_gather4_lz: + + // Gather4 with comparison + case Intrinsic::amdgcn_image_gather4_c: + case Intrinsic::amdgcn_image_gather4_c_cl: + case Intrinsic::amdgcn_image_gather4_c_l: + case Intrinsic::amdgcn_image_gather4_c_b: + case Intrinsic::amdgcn_image_gather4_c_b_cl: + case Intrinsic::amdgcn_image_gather4_c_lz: + + // Gather4 with offsets + case Intrinsic::amdgcn_image_gather4_o: + case Intrinsic::amdgcn_image_gather4_cl_o: + case Intrinsic::amdgcn_image_gather4_l_o: + case Intrinsic::amdgcn_image_gather4_b_o: + case Intrinsic::amdgcn_image_gather4_b_cl_o: + case Intrinsic::amdgcn_image_gather4_lz_o: + + // Gather4 with comparison and offsets + case Intrinsic::amdgcn_image_gather4_c_o: + case Intrinsic::amdgcn_image_gather4_c_cl_o: + case Intrinsic::amdgcn_image_gather4_c_l_o: + case Intrinsic::amdgcn_image_gather4_c_b_o: + case Intrinsic::amdgcn_image_gather4_c_b_cl_o: + case Intrinsic::amdgcn_image_gather4_c_lz_o: { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(1)); + Info.align = 0; + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable; + return true; + } + case Intrinsic::amdgcn_image_store: + case Intrinsic::amdgcn_image_store_mip: { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(2)); + Info.flags = MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + Info.align = 0; + return true; + } + case Intrinsic::amdgcn_image_atomic_swap: + case Intrinsic::amdgcn_image_atomic_add: + case Intrinsic::amdgcn_image_atomic_sub: + case Intrinsic::amdgcn_image_atomic_smin: + case Intrinsic::amdgcn_image_atomic_umin: + case Intrinsic::amdgcn_image_atomic_smax: + case Intrinsic::amdgcn_image_atomic_umax: + case Intrinsic::amdgcn_image_atomic_and: + case Intrinsic::amdgcn_image_atomic_or: + case Intrinsic::amdgcn_image_atomic_xor: + case Intrinsic::amdgcn_image_atomic_inc: + case Intrinsic::amdgcn_image_atomic_dec: { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(2)); + + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + return true; + } + case Intrinsic::amdgcn_image_atomic_cmpswap: { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(3)); + + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + return true; + } default: return false; } @@ -2946,24 +3097,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); if (TII->isMIMG(MI)) { - if (!MI.memoperands_empty()) - return BB; + if (MI.memoperands_empty() && MI.mayLoadOrStore()) { + report_fatal_error("missing mem operand from MIMG instruction"); + } // Add a memoperand for mimg instructions so that they aren't assumed to // be ordered memory instuctions. - MachinePointerInfo PtrInfo(MFI->getImagePSV()); - MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable; - if (MI.mayStore()) - Flags |= MachineMemOperand::MOStore; - - if (MI.mayLoad()) - Flags |= MachineMemOperand::MOLoad; - - if (Flags != MachineMemOperand::MODereferenceable) { - auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0); - MI.addMemOperand(*MF, MMO); - } - return BB; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 6013ebc81d9..68acfa4c6ea 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -29,7 +29,6 @@ using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), BufferPSV(*(MF.getSubtarget().getInstrInfo())), - ImagePSV(*(MF.getSubtarget().getInstrInfo())), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 5dde72910ee..7ac755be04d 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -34,12 +34,14 @@ namespace llvm { class MachineFrameInfo; class MachineFunction; +class SIInstrInfo; class TargetRegisterClass; class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { public: + // TODO: Is the img rsrc useful? explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : - PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } + PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} bool isConstant(const MachineFrameInfo *) const override { // This should probably be true for most images, but we will start by being @@ -136,7 +138,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; AMDGPUBufferPseudoSourceValue BufferPSV; - AMDGPUImagePseudoSourceValue ImagePSV; + + DenseMap<const Value *, + std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; + private: unsigned LDSWaveSpillSize = 0; @@ -629,12 +634,18 @@ public: return LDSWaveSpillSize; } + // FIXME: These should be unique const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { return &BufferPSV; } - const AMDGPUImagePseudoSourceValue *getImagePSV() const { - return &ImagePSV; + const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, + const Value *ImgRsrc) { + assert(ImgRsrc); + auto PSV = ImagePSVs.try_emplace( + ImgRsrc, + llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); + return PSV.first->second.get(); } }; |