diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/gv-offset-folding.ll | 21 |
3 files changed, 31 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 60fe8c8bf54..3142e4c8f42 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1422,6 +1422,14 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, return DAG.getUNDEF(ASC->getValueType(0)); } +bool +SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + if (GA->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) + return false; + + return TargetLowering::isOffsetFoldingLegal(GA); +} + SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 2f013198970..032372b7b17 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -105,6 +105,8 @@ public: bool isTypeDesirableForOp(unsigned Op, EVT VT) const override; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/llvm/test/CodeGen/AMDGPU/gv-offset-folding.ll b/llvm/test/CodeGen/AMDGPU/gv-offset-folding.ll new file mode 100644 index 00000000000..c75fdb35dd0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/gv-offset-folding.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -relocation-model=static < %s | FileCheck %s + +@lds = external addrspace(3) global [4 x i32] + +; Function Attrs: nounwind + +; Offset folding is an optimization done for global variables with relocations, +; which allows you to store the offset in the r_addend of the relocation entry. +; The offset is apllied to the variables address at link time, which eliminates +; the need to emit shader instructions to do this calculation. +; We don't use relocations for local memory, so we should never fold offsets +; for local memory globals. + +; CHECK-LABEL: lds_no_offset: +; CHECK ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:4 +define void @lds_no_offset() { +entry: + %ptr = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds, i32 0, i32 1 + store i32 0, i32 addrspace(3)* %ptr + ret void +} |