diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 79 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 42 |
3 files changed, 110 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index fb350db9325..18f15288a9c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -127,6 +127,13 @@ private: SDValue &TFE) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &GLC) const; + void SelectMUBUFConstant(SDValue Constant, + SDValue &SOffset, + SDValue &ImmOffset) const; + bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, + SDValue &ImmOffset) const; + bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, + SDValue &ImmOffset, SDValue &VOffset) const; bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, @@ -1112,6 +1119,78 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); } +void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, + SDValue &SOffset, + SDValue &ImmOffset) const { + SDLoc DL(Constant); + uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); + uint32_t Overflow = 0; + + if (Imm >= 4096) { + if (Imm <= 4095 + 64) { + // Use an SOffset inline constant for 1..64 + Overflow = Imm - 4095; + Imm = 4095; + } else { + // Try to keep the same value in SOffset for adjacent loads, so that + // the corresponding register contents can be re-used. + // + // Load values with all low-bits set into SOffset, so that a larger + // range of values can be covered using s_movk_i32 + uint32_t High = (Imm + 1) & ~4095; + uint32_t Low = (Imm + 1) & 4095; + Imm = Low; + Overflow = High - 1; + } + } + + ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); + + if (Overflow <= 64) + SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); + else + SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, + CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), + 0); +} + +bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, + SDValue &SOffset, + SDValue &ImmOffset) const { + SDLoc DL(Offset); + + if (!isa<ConstantSDNode>(Offset)) + return false; + + SelectMUBUFConstant(Offset, SOffset, ImmOffset); + + return true; +} + +bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, + SDValue &SOffset, + SDValue &ImmOffset, + SDValue &VOffset) const { + SDLoc DL(Offset); + + // Don't generate an unnecessary voffset for constant offsets. + if (isa<ConstantSDNode>(Offset)) + return false; + + if (CurDAG->isBaseWithConstantOffset(Offset)) { + SDValue N0 = Offset.getOperand(0); + SDValue N1 = Offset.getOperand(1); + SelectMUBUFConstant(N1, SOffset, ImmOffset); + VOffset = N0; + } else { + SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); + ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); + VOffset = Offset; + } + + return true; +} + /// /// \param EncodedOffset This is the immediate value that will be encoded /// directly into the instruction. On SI/CI the \p EncodedOffset diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 2a1ce1267b4..cdaa662dac2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -722,6 +722,8 @@ def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">; def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">; def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">; def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">; +def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">; +def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">; def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">; def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 4d9c942dbd6..902cb8bcb6f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2107,28 +2107,38 @@ def : Pat < // buffer_load/store_format patterns //===----------------------------------------------------------------------===// def : Pat< - (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, 0, 0, + (int_amdgcn_buffer_load_format v4i32:$rsrc, 0, + (MUBUFIntrinsicOffset i32:$soffset, + i16:$offset), imm:$glc, imm:$slc), (BUFFER_LOAD_FORMAT_XYZW_OFFSET $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; def : Pat< - (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, i32:$vindex, 0, + (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicOffset i32:$soffset, + i16:$offset), imm:$glc, imm:$slc), (BUFFER_LOAD_FORMAT_XYZW_IDXEN $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; def : Pat< - (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, 0, i32:$voffset, + (int_amdgcn_buffer_load_format v4i32:$rsrc, 0, + (MUBUFIntrinsicVOffset i32:$soffset, + i16:$offset, + i32:$voffset), imm:$glc, imm:$slc), (BUFFER_LOAD_FORMAT_XYZW_OFFEN $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; def : Pat< - (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, i32:$vindex, i32:$voffset, + (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicVOffset i32:$soffset, + i16:$offset, + i32:$voffset), imm:$glc, imm:$slc), (BUFFER_LOAD_FORMAT_XYZW_BOTHEN (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), @@ -2137,32 +2147,38 @@ def : Pat< >; def : Pat< - (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, - i32:$soffset, imm:$offset, 0, 0, + (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, 0, + (MUBUFIntrinsicOffset i32:$soffset, + i16:$offset), imm:$glc, imm:$slc), (BUFFER_STORE_FORMAT_XYZW_OFFSET $vdata, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; def : Pat< - (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, - i32:$soffset, imm:$offset, i32:$vindex, 0, - imm:$glc, imm:$slc), + (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicOffset i32:$soffset, + i16:$offset), + imm:$glc, imm:$slc), (BUFFER_STORE_FORMAT_XYZW_IDXEN $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; def : Pat< - (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, - i32:$soffset, imm:$offset, 0, i32:$voffset, + (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, 0, + (MUBUFIntrinsicVOffset i32:$soffset, + i16:$offset, + i32:$voffset), imm:$glc, imm:$slc), (BUFFER_STORE_FORMAT_XYZW_OFFEN $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0) >; def : Pat< - (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$soffset, - imm:$offset, i32:$vindex, i32:$voffset, + (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicVOffset i32:$soffset, + i16:$offset, + i32:$voffset), imm:$glc, imm:$slc), (BUFFER_STORE_FORMAT_XYZW_BOTHEN $vdata, |