diff options
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 169 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 19 |
4 files changed, 176 insertions, 25 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index cc08fe68327..bd570c86b72 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -304,7 +304,8 @@ class AMDGPUImageLoad<bit NoMem = 0> : Intrinsic < llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - !if(NoMem, [IntrNoMem], [IntrReadMem])>; + !if(NoMem, [IntrNoMem], [IntrReadMem]), "", + !if(NoMem, [], [SDNPMemOperand])>; def int_amdgcn_image_load : AMDGPUImageLoad; def int_amdgcn_image_load_mip : AMDGPUImageLoad; @@ -320,7 +321,7 @@ class AMDGPUImageStore : Intrinsic < llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - []>; + [IntrWriteMem], "", [SDNPMemOperand]>; def int_amdgcn_image_store : AMDGPUImageStore; def int_amdgcn_image_store_mip : AMDGPUImageStore; @@ -336,7 +337,8 @@ class AMDGPUImageSample<bit NoMem = 0> : Intrinsic < llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - !if(NoMem, [IntrNoMem], [IntrReadMem])>; + !if(NoMem, [IntrNoMem], [IntrReadMem]), "", + !if(NoMem, [], [SDNPMemOperand])>; // Basic sample def int_amdgcn_image_sample : AMDGPUImageSample; @@ -428,7 +430,7 @@ class AMDGPUImageAtomic : Intrinsic < llvm_i1_ty, // r128(imm) llvm_i1_ty, // da(imm) llvm_i1_ty], // slc(imm) - []>; + [], "", [SDNPMemOperand]>; def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic; def int_amdgcn_image_atomic_add : AMDGPUImageAtomic; @@ -451,7 +453,7 @@ def int_amdgcn_image_atomic_cmpswap : Intrinsic < llvm_i1_ty, // r128(imm) llvm_i1_ty, // da(imm) llvm_i1_ty], // slc(imm) - []>; + [], "", [SDNPMemOperand]>; class AMDGPUBufferLoad : Intrinsic < [llvm_anyfloat_ty], diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 50ee88fa635..9c96fb8737d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -575,6 +575,157 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } + + // Image load. + case Intrinsic::amdgcn_image_load: + case Intrinsic::amdgcn_image_load_mip: + + // Sample. + case Intrinsic::amdgcn_image_sample: + case Intrinsic::amdgcn_image_sample_cl: + case Intrinsic::amdgcn_image_sample_d: + case Intrinsic::amdgcn_image_sample_d_cl: + case Intrinsic::amdgcn_image_sample_l: + case Intrinsic::amdgcn_image_sample_b: + case Intrinsic::amdgcn_image_sample_b_cl: + case Intrinsic::amdgcn_image_sample_lz: + case Intrinsic::amdgcn_image_sample_cd: + case Intrinsic::amdgcn_image_sample_cd_cl: + + // Sample with comparison. + case Intrinsic::amdgcn_image_sample_c: + case Intrinsic::amdgcn_image_sample_c_cl: + case Intrinsic::amdgcn_image_sample_c_d: + case Intrinsic::amdgcn_image_sample_c_d_cl: + case Intrinsic::amdgcn_image_sample_c_l: + case Intrinsic::amdgcn_image_sample_c_b: + case Intrinsic::amdgcn_image_sample_c_b_cl: + case Intrinsic::amdgcn_image_sample_c_lz: + case Intrinsic::amdgcn_image_sample_c_cd: + case Intrinsic::amdgcn_image_sample_c_cd_cl: + + // Sample with offsets. + case Intrinsic::amdgcn_image_sample_o: + case Intrinsic::amdgcn_image_sample_cl_o: + case Intrinsic::amdgcn_image_sample_d_o: + case Intrinsic::amdgcn_image_sample_d_cl_o: + case Intrinsic::amdgcn_image_sample_l_o: + case Intrinsic::amdgcn_image_sample_b_o: + case Intrinsic::amdgcn_image_sample_b_cl_o: + case Intrinsic::amdgcn_image_sample_lz_o: + case Intrinsic::amdgcn_image_sample_cd_o: + case Intrinsic::amdgcn_image_sample_cd_cl_o: + + // Sample with comparison and offsets. + case Intrinsic::amdgcn_image_sample_c_o: + case Intrinsic::amdgcn_image_sample_c_cl_o: + case Intrinsic::amdgcn_image_sample_c_d_o: + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + case Intrinsic::amdgcn_image_sample_c_l_o: + case Intrinsic::amdgcn_image_sample_c_b_o: + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + case Intrinsic::amdgcn_image_sample_c_lz_o: + case Intrinsic::amdgcn_image_sample_c_cd_o: + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + + // Basic gather4 + case Intrinsic::amdgcn_image_gather4: + case Intrinsic::amdgcn_image_gather4_cl: + case Intrinsic::amdgcn_image_gather4_l: + case Intrinsic::amdgcn_image_gather4_b: + case Intrinsic::amdgcn_image_gather4_b_cl: + case Intrinsic::amdgcn_image_gather4_lz: + + // Gather4 with comparison + case Intrinsic::amdgcn_image_gather4_c: + case Intrinsic::amdgcn_image_gather4_c_cl: + case Intrinsic::amdgcn_image_gather4_c_l: + case Intrinsic::amdgcn_image_gather4_c_b: + case Intrinsic::amdgcn_image_gather4_c_b_cl: + case Intrinsic::amdgcn_image_gather4_c_lz: + + // Gather4 with offsets + case Intrinsic::amdgcn_image_gather4_o: + case Intrinsic::amdgcn_image_gather4_cl_o: + case Intrinsic::amdgcn_image_gather4_l_o: + case Intrinsic::amdgcn_image_gather4_b_o: + case Intrinsic::amdgcn_image_gather4_b_cl_o: + case Intrinsic::amdgcn_image_gather4_lz_o: + + // Gather4 with comparison and offsets + case Intrinsic::amdgcn_image_gather4_c_o: + case Intrinsic::amdgcn_image_gather4_c_cl_o: + case Intrinsic::amdgcn_image_gather4_c_l_o: + case Intrinsic::amdgcn_image_gather4_c_b_o: + case Intrinsic::amdgcn_image_gather4_c_b_cl_o: + case Intrinsic::amdgcn_image_gather4_c_lz_o: { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(1)); + Info.align = 0; + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable; + return true; + } + case Intrinsic::amdgcn_image_store: + case Intrinsic::amdgcn_image_store_mip: { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(2)); + Info.flags = MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + Info.align = 0; + return true; + } + case Intrinsic::amdgcn_image_atomic_swap: + case Intrinsic::amdgcn_image_atomic_add: + case Intrinsic::amdgcn_image_atomic_sub: + case Intrinsic::amdgcn_image_atomic_smin: + case Intrinsic::amdgcn_image_atomic_umin: + case Intrinsic::amdgcn_image_atomic_smax: + case Intrinsic::amdgcn_image_atomic_umax: + case Intrinsic::amdgcn_image_atomic_and: + case Intrinsic::amdgcn_image_atomic_or: + case Intrinsic::amdgcn_image_atomic_xor: + case Intrinsic::amdgcn_image_atomic_inc: + case Intrinsic::amdgcn_image_atomic_dec: { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(2)); + + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + return true; + } + case Intrinsic::amdgcn_image_atomic_cmpswap: { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(3)); + + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + return true; + } default: return false; } @@ -2946,24 +3097,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); if (TII->isMIMG(MI)) { - if (!MI.memoperands_empty()) - return BB; + if (MI.memoperands_empty() && MI.mayLoadOrStore()) { + report_fatal_error("missing mem operand from MIMG instruction"); + } // Add a memoperand for mimg instructions so that they aren't assumed to // be ordered memory instuctions. - MachinePointerInfo PtrInfo(MFI->getImagePSV()); - MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable; - if (MI.mayStore()) - Flags |= MachineMemOperand::MOStore; - - if (MI.mayLoad()) - Flags |= MachineMemOperand::MOLoad; - - if (Flags != MachineMemOperand::MODereferenceable) { - auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0); - MI.addMemOperand(*MF, MMO); - } - return BB; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 6013ebc81d9..68acfa4c6ea 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -29,7 +29,6 @@ using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), BufferPSV(*(MF.getSubtarget().getInstrInfo())), - ImagePSV(*(MF.getSubtarget().getInstrInfo())), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 5dde72910ee..7ac755be04d 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -34,12 +34,14 @@ namespace llvm { class MachineFrameInfo; class MachineFunction; +class SIInstrInfo; class TargetRegisterClass; class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { public: + // TODO: Is the img rsrc useful? explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : - PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } + PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} bool isConstant(const MachineFrameInfo *) const override { // This should probably be true for most images, but we will start by being @@ -136,7 +138,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; AMDGPUBufferPseudoSourceValue BufferPSV; - AMDGPUImagePseudoSourceValue ImagePSV; + + DenseMap<const Value *, + std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; + private: unsigned LDSWaveSpillSize = 0; @@ -629,12 +634,18 @@ public: return LDSWaveSpillSize; } + // FIXME: These should be unique const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { return &BufferPSV; } - const AMDGPUImagePseudoSourceValue *getImagePSV() const { - return &ImagePSV; + const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, + const Value *ImgRsrc) { + assert(ImgRsrc); + auto PSV = ImagePSVs.try_emplace( + ImgRsrc, + llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII)); + return PSV.first->second.get(); } }; |