summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-12-29 17:18:14 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-12-29 17:18:14 +0000
commit905f3518ba2adff6aee5271780359abbd5bcfae9 (patch)
treefcd5284336ffa17d1d44ef6c6dcb8e71f679e710 /llvm/lib
parent5a5e1caf25e6869694b3f3c78990a4eddfe3b620 (diff)
downloadbcm5719-llvm-905f3518ba2adff6aee5271780359abbd5bcfae9.tar.gz
bcm5719-llvm-905f3518ba2adff6aee5271780359abbd5bcfae9.zip
AMDGPU: Implement getTgtMemIntrinsic for images
Currently all images are lowered to have a single image PseudoSourceValue. Image stores happen to have overly strict mayLoad/mayStore/hasSideEffects flags set on them, so this happens to work. When these are fixed to be correct, the scheduler breaks this because the identical PSVs are assumed to be the same address. These need to be unique to the image resource value. llvm-svn: 321555
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp169
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h19
3 files changed, 169 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 50ee88fa635..9c96fb8737d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -575,6 +575,157 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
+
+ // Image load.
+ case Intrinsic::amdgcn_image_load:
+ case Intrinsic::amdgcn_image_load_mip:
+
+ // Sample.
+ case Intrinsic::amdgcn_image_sample:
+ case Intrinsic::amdgcn_image_sample_cl:
+ case Intrinsic::amdgcn_image_sample_d:
+ case Intrinsic::amdgcn_image_sample_d_cl:
+ case Intrinsic::amdgcn_image_sample_l:
+ case Intrinsic::amdgcn_image_sample_b:
+ case Intrinsic::amdgcn_image_sample_b_cl:
+ case Intrinsic::amdgcn_image_sample_lz:
+ case Intrinsic::amdgcn_image_sample_cd:
+ case Intrinsic::amdgcn_image_sample_cd_cl:
+
+ // Sample with comparison.
+ case Intrinsic::amdgcn_image_sample_c:
+ case Intrinsic::amdgcn_image_sample_c_cl:
+ case Intrinsic::amdgcn_image_sample_c_d:
+ case Intrinsic::amdgcn_image_sample_c_d_cl:
+ case Intrinsic::amdgcn_image_sample_c_l:
+ case Intrinsic::amdgcn_image_sample_c_b:
+ case Intrinsic::amdgcn_image_sample_c_b_cl:
+ case Intrinsic::amdgcn_image_sample_c_lz:
+ case Intrinsic::amdgcn_image_sample_c_cd:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl:
+
+ // Sample with offsets.
+ case Intrinsic::amdgcn_image_sample_o:
+ case Intrinsic::amdgcn_image_sample_cl_o:
+ case Intrinsic::amdgcn_image_sample_d_o:
+ case Intrinsic::amdgcn_image_sample_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_l_o:
+ case Intrinsic::amdgcn_image_sample_b_o:
+ case Intrinsic::amdgcn_image_sample_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_lz_o:
+ case Intrinsic::amdgcn_image_sample_cd_o:
+ case Intrinsic::amdgcn_image_sample_cd_cl_o:
+
+ // Sample with comparison and offsets.
+ case Intrinsic::amdgcn_image_sample_c_o:
+ case Intrinsic::amdgcn_image_sample_c_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_d_o:
+ case Intrinsic::amdgcn_image_sample_c_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_l_o:
+ case Intrinsic::amdgcn_image_sample_c_b_o:
+ case Intrinsic::amdgcn_image_sample_c_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_lz_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
+
+ // Basic gather4
+ case Intrinsic::amdgcn_image_gather4:
+ case Intrinsic::amdgcn_image_gather4_cl:
+ case Intrinsic::amdgcn_image_gather4_l:
+ case Intrinsic::amdgcn_image_gather4_b:
+ case Intrinsic::amdgcn_image_gather4_b_cl:
+ case Intrinsic::amdgcn_image_gather4_lz:
+
+ // Gather4 with comparison
+ case Intrinsic::amdgcn_image_gather4_c:
+ case Intrinsic::amdgcn_image_gather4_c_cl:
+ case Intrinsic::amdgcn_image_gather4_c_l:
+ case Intrinsic::amdgcn_image_gather4_c_b:
+ case Intrinsic::amdgcn_image_gather4_c_b_cl:
+ case Intrinsic::amdgcn_image_gather4_c_lz:
+
+ // Gather4 with offsets
+ case Intrinsic::amdgcn_image_gather4_o:
+ case Intrinsic::amdgcn_image_gather4_cl_o:
+ case Intrinsic::amdgcn_image_gather4_l_o:
+ case Intrinsic::amdgcn_image_gather4_b_o:
+ case Intrinsic::amdgcn_image_gather4_b_cl_o:
+ case Intrinsic::amdgcn_image_gather4_lz_o:
+
+ // Gather4 with comparison and offsets
+ case Intrinsic::amdgcn_image_gather4_c_o:
+ case Intrinsic::amdgcn_image_gather4_c_cl_o:
+ case Intrinsic::amdgcn_image_gather4_c_l_o:
+ case Intrinsic::amdgcn_image_gather4_c_b_o:
+ case Intrinsic::amdgcn_image_gather4_c_b_cl_o:
+ case Intrinsic::amdgcn_image_gather4_c_lz_o: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = MFI->getImagePSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(1));
+ Info.align = 0;
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MODereferenceable;
+ return true;
+ }
+ case Intrinsic::amdgcn_image_store:
+ case Intrinsic::amdgcn_image_store_mip: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
+ Info.ptrVal = MFI->getImagePSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(2));
+ Info.flags = MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
+ Info.align = 0;
+ return true;
+ }
+ case Intrinsic::amdgcn_image_atomic_swap:
+ case Intrinsic::amdgcn_image_atomic_add:
+ case Intrinsic::amdgcn_image_atomic_sub:
+ case Intrinsic::amdgcn_image_atomic_smin:
+ case Intrinsic::amdgcn_image_atomic_umin:
+ case Intrinsic::amdgcn_image_atomic_smax:
+ case Intrinsic::amdgcn_image_atomic_umax:
+ case Intrinsic::amdgcn_image_atomic_and:
+ case Intrinsic::amdgcn_image_atomic_or:
+ case Intrinsic::amdgcn_image_atomic_xor:
+ case Intrinsic::amdgcn_image_atomic_inc:
+ case Intrinsic::amdgcn_image_atomic_dec: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = MFI->getImagePSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(2));
+
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
+
+ // XXX - Should this be volatile without known ordering?
+ Info.flags |= MachineMemOperand::MOVolatile;
+ return true;
+ }
+ case Intrinsic::amdgcn_image_atomic_cmpswap: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = MFI->getImagePSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(3));
+
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
+
+ // XXX - Should this be volatile without known ordering?
+ Info.flags |= MachineMemOperand::MOVolatile;
+ return true;
+ }
default:
return false;
}
@@ -2946,24 +3097,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
if (TII->isMIMG(MI)) {
- if (!MI.memoperands_empty())
- return BB;
+ if (MI.memoperands_empty() && MI.mayLoadOrStore()) {
+ report_fatal_error("missing mem operand from MIMG instruction");
+ }
// Add a memoperand for mimg instructions so that they aren't assumed to
// be ordered memory instuctions.
- MachinePointerInfo PtrInfo(MFI->getImagePSV());
- MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable;
- if (MI.mayStore())
- Flags |= MachineMemOperand::MOStore;
-
- if (MI.mayLoad())
- Flags |= MachineMemOperand::MOLoad;
-
- if (Flags != MachineMemOperand::MODereferenceable) {
- auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0);
- MI.addMemOperand(*MF, MMO);
- }
-
return BB;
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 6013ebc81d9..68acfa4c6ea 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -29,7 +29,6 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
BufferPSV(*(MF.getSubtarget().getInstrInfo())),
- ImagePSV(*(MF.getSubtarget().getInstrInfo())),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 5dde72910ee..7ac755be04d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -34,12 +34,14 @@ namespace llvm {
class MachineFrameInfo;
class MachineFunction;
+class SIInstrInfo;
class TargetRegisterClass;
class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
public:
+ // TODO: Is the img rsrc useful?
explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
- PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
+ PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
bool isConstant(const MachineFrameInfo *) const override {
// This should probably be true for most images, but we will start by being
@@ -136,7 +138,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
AMDGPUBufferPseudoSourceValue BufferPSV;
- AMDGPUImagePseudoSourceValue ImagePSV;
+
+ DenseMap<const Value *,
+ std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
+
private:
unsigned LDSWaveSpillSize = 0;
@@ -629,12 +634,18 @@ public:
return LDSWaveSpillSize;
}
+ // FIXME: These should be unique
const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
return &BufferPSV;
}
- const AMDGPUImagePseudoSourceValue *getImagePSV() const {
- return &ImagePSV;
+ const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
+ const Value *ImgRsrc) {
+ assert(ImgRsrc);
+ auto PSV = ImagePSVs.try_emplace(
+ ImgRsrc,
+ llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
+ return PSV.first->second.get();
}
};
OpenPOWER on IntegriCloud