diff options
author | Nicolai Haehnle <nhaehnle@gmail.com> | 2016-03-18 16:24:20 +0000 |
---|---|---|
committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2016-03-18 16:24:20 +0000 |
commit | 3003ba00a3260bdee71dd802bcfa970c3580e6bb (patch) | |
tree | 2ee90f91725b6c097dbf0def5e621966580566e2 /llvm/lib/Target | |
parent | a74cd526e9b273f1ca99793e62dfb6d1396bf6b3 (diff) | |
download | bcm5719-llvm-3003ba00a3260bdee71dd802bcfa970c3580e6bb.tar.gz bcm5719-llvm-3003ba00a3260bdee71dd802bcfa970c3580e6bb.zip |
AMDGPU: use ComplexPattern for offsets in llvm.amdgcn.buffer.load/store.format
Summary:
We cannot easily deduce that an offset is in an SGPR, but the Mesa frontend
cannot easily make use of an explicit soffset parameter either. Furthermore,
it is likely that in the future, LLVM will be in a better position than the
frontend to choose an SGPR offset if possible.
Since there aren't any frontend uses of these intrinsics in upstream
repositories yet, I would like to take this opportunity to change the
intrinsic signatures to a single offset parameter, which is then selected
to immediate offsets or voffsets using a ComplexPattern.
Reviewers: arsenm, tstellarAMD, mareko
Subscribers: arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D18218
llvm-svn: 263790
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 79 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 42 |
3 files changed, 110 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index fb350db9325..18f15288a9c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -127,6 +127,13 @@ private: SDValue &TFE) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &GLC) const; + void SelectMUBUFConstant(SDValue Constant, + SDValue &SOffset, + SDValue &ImmOffset) const; + bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, + SDValue &ImmOffset) const; + bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, + SDValue &ImmOffset, SDValue &VOffset) const; bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, @@ -1112,6 +1119,78 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); } +void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, + SDValue &SOffset, + SDValue &ImmOffset) const { + SDLoc DL(Constant); + uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); + uint32_t Overflow = 0; + + if (Imm >= 4096) { + if (Imm <= 4095 + 64) { + // Use an SOffset inline constant for 1..64 + Overflow = Imm - 4095; + Imm = 4095; + } else { + // Try to keep the same value in SOffset for adjacent loads, so that + // the corresponding register contents can be re-used. + // + // Load values with all low-bits set into SOffset, so that a larger + // range of values can be covered using s_movk_i32 + uint32_t High = (Imm + 1) & ~4095; + uint32_t Low = (Imm + 1) & 4095; + Imm = Low; + Overflow = High - 1; + } + } + + ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); + + if (Overflow <= 64) + SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); + else + SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, + CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), + 0); +} + +bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, + SDValue &SOffset, + SDValue &ImmOffset) const { + SDLoc DL(Offset); + + if (!isa<ConstantSDNode>(Offset)) + return false; + + SelectMUBUFConstant(Offset, SOffset, ImmOffset); + + return true; +} + +bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, + SDValue &SOffset, + SDValue &ImmOffset, + SDValue &VOffset) const { + SDLoc DL(Offset); + + // Don't generate an unnecessary voffset for constant offsets. + if (isa<ConstantSDNode>(Offset)) + return false; + + if (CurDAG->isBaseWithConstantOffset(Offset)) { + SDValue N0 = Offset.getOperand(0); + SDValue N1 = Offset.getOperand(1); + SelectMUBUFConstant(N1, SOffset, ImmOffset); + VOffset = N0; + } else { + SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); + ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); + VOffset = Offset; + } + + return true; +} + /// /// \param EncodedOffset This is the immediate value that will be encoded /// directly into the instruction. On SI/CI the \p EncodedOffset diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 2a1ce1267b4..cdaa662dac2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -722,6 +722,8 @@ def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">; def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">; def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">; def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">; +def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">; +def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">; def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">; def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 4d9c942dbd6..902cb8bcb6f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2107,28 +2107,38 @@ def : Pat < // buffer_load/store_format patterns //===----------------------------------------------------------------------===// def : Pat< - (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, 0, 0, + (int_amdgcn_buffer_load_format v4i32:$rsrc, 0, + (MUBUFIntrinsicOffset i32:$soffset, + i16:$offset), imm:$glc, imm:$slc), (BUFFER_LOAD_FORMAT_XYZW_OFFSET $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; def : Pat< - (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, i32:$vindex, 0, + (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicOffset i32:$soffset, + i16:$offset), imm:$glc, imm:$slc), (BUFFER_LOAD_FORMAT_XYZW_IDXEN $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; def : Pat< - (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, 0, i32:$voffset, + (int_amdgcn_buffer_load_format v4i32:$rsrc, 0, + (MUBUFIntrinsicVOffset i32:$soffset, + i16:$offset, + i32:$voffset), imm:$glc, imm:$slc), (BUFFER_LOAD_FORMAT_XYZW_OFFEN $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; def : Pat< - (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, i32:$vindex, i32:$voffset, + (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicVOffset i32:$soffset, + i16:$offset, + i32:$voffset), imm:$glc, imm:$slc), (BUFFER_LOAD_FORMAT_XYZW_BOTHEN (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), @@ -2137,32 +2147,38 @@ def : Pat< >; def : Pat< - (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, - i32:$soffset, imm:$offset, 0, 0, + (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, 0, + (MUBUFIntrinsicOffset i32:$soffset, + i16:$offset), imm:$glc, imm:$slc), (BUFFER_STORE_FORMAT_XYZW_OFFSET $vdata, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; def : Pat< - (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, - i32:$soffset, imm:$offset, i32:$vindex, 0, - imm:$glc, imm:$slc), + (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicOffset i32:$soffset, + i16:$offset), + imm:$glc, imm:$slc), (BUFFER_STORE_FORMAT_XYZW_IDXEN $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; def : Pat< - (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, - i32:$soffset, imm:$offset, 0, i32:$voffset, + (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, 0, + (MUBUFIntrinsicVOffset i32:$soffset, + i16:$offset, + i32:$voffset), imm:$glc, imm:$slc), (BUFFER_STORE_FORMAT_XYZW_OFFEN $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; def : Pat< - (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$soffset, - imm:$offset, i32:$vindex, i32:$voffset, + (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicVOffset i32:$soffset, + i16:$offset, + i32:$voffset), imm:$glc, imm:$slc), (BUFFER_STORE_FORMAT_XYZW_BOTHEN $vdata, |