diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-10-08 10:04:41 -0700 |
---|---|---|
committer | Matt Arsenault <arsenm2@gmail.com> | 2019-10-25 13:11:09 -0700 |
commit | 171cf5302f43776b07615e32b2ffd6ddf4e5d890 (patch) | |
tree | 1f029fd31e417dba705e7fbfd15469fbb8f8a683 /llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | |
parent | 1ce552f3ef8d6455c10a9886191c1898594975e0 (diff) | |
download | bcm5719-llvm-171cf5302f43776b07615e32b2ffd6ddf4e5d890.tar.gz bcm5719-llvm-171cf5302f43776b07615e32b2ffd6ddf4e5d890.zip |
AMDGPU/GlobalISel: Handle flat/global G_ATOMIC_CMPXCHG
Custom lower this to a target instruction with the merge operands. I
think it might be better to directly select this and emit a
REG_SEQUENCE, but this would be more work since it would require
splitting the tablegen patterns for these cases from the other
atomics.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 39 |
1 files changed, 38 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 200946f2c7d..f780d43475d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -848,7 +848,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX, - G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG}) + G_ATOMICRMW_UMIN}) .legalFor({{S32, GlobalPtr}, {S32, LocalPtr}, {S64, GlobalPtr}, {S64, LocalPtr}}); if (ST.hasFlatAddressSpace()) { @@ -858,6 +858,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder(G_ATOMICRMW_FADD) .legalFor({{S32, LocalPtr}}); + // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output + // demarshalling + getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG) + .customFor({{S32, GlobalPtr}, {S64, GlobalPtr}, + {S32, FlatPtr}, {S64, FlatPtr}}) + .legalFor({{S32, LocalPtr}, {S64, LocalPtr}, + {S32, RegionPtr}, {S64, RegionPtr}}); + getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) .lower(); @@ -1116,6 +1124,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, return legalizeFMad(MI, MRI, B); case TargetOpcode::G_FDIV: return legalizeFDIV(MI, MRI, B); + case TargetOpcode::G_ATOMIC_CMPXCHG: + return legalizeAtomicCmpXChg(MI, MRI, B); default: return false; } @@ -1724,6 +1734,33 @@ bool AMDGPULegalizerInfo::legalizeFMad( return Helper.lowerFMad(MI) == LegalizerHelper::Legalized; } +bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { + Register DstReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + Register CmpVal = MI.getOperand(2).getReg(); + Register NewVal = MI.getOperand(3).getReg(); + + assert(SITargetLowering::isFlatGlobalAddrSpace( + MRI.getType(PtrReg).getAddressSpace()) && + "this should not have been custom lowered"); + + LLT ValTy = MRI.getType(CmpVal); + LLT VecTy = LLT::vector(2, ValTy); + + B.setInstr(MI); + Register PackedVal = B.buildBuildVector(VecTy, { NewVal, CmpVal }).getReg(0); + + B.buildInstr(AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG) + .addDef(DstReg) + .addUse(PtrReg) + .addUse(PackedVal) + .setMemRefs(MI.memoperands()); + + MI.eraseFromParent(); + return true; +} + // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI) { |