summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp79
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td42
3 files changed, 110 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index fb350db9325..18f15288a9c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -127,6 +127,13 @@ private:
SDValue &TFE) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &GLC) const;
+ void SelectMUBUFConstant(SDValue Constant,
+ SDValue &SOffset,
+ SDValue &ImmOffset) const;
+ bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
+ SDValue &ImmOffset) const;
+ bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
+ SDValue &ImmOffset, SDValue &VOffset) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
@@ -1112,6 +1119,78 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
}
+void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
+ SDValue &SOffset,
+ SDValue &ImmOffset) const {
+ SDLoc DL(Constant);
+ uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
+ uint32_t Overflow = 0;
+
+ if (Imm >= 4096) {
+ if (Imm <= 4095 + 64) {
+ // Use an SOffset inline constant for 1..64
+ Overflow = Imm - 4095;
+ Imm = 4095;
+ } else {
+ // Try to keep the same value in SOffset for adjacent loads, so that
+ // the corresponding register contents can be re-used.
+ //
+ // Load values with all low-bits set into SOffset, so that a larger
+ // range of values can be covered using s_movk_i32
+ uint32_t High = (Imm + 1) & ~4095;
+ uint32_t Low = (Imm + 1) & 4095;
+ Imm = Low;
+ Overflow = High - 1;
+ }
+ }
+
+ ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
+
+ if (Overflow <= 64)
+ SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
+ else
+ SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
+ 0);
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
+ SDValue &SOffset,
+ SDValue &ImmOffset) const {
+ SDLoc DL(Offset);
+
+ if (!isa<ConstantSDNode>(Offset))
+ return false;
+
+ SelectMUBUFConstant(Offset, SOffset, ImmOffset);
+
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
+ SDValue &SOffset,
+ SDValue &ImmOffset,
+ SDValue &VOffset) const {
+ SDLoc DL(Offset);
+
+ // Don't generate an unnecessary voffset for constant offsets.
+ if (isa<ConstantSDNode>(Offset))
+ return false;
+
+ if (CurDAG->isBaseWithConstantOffset(Offset)) {
+ SDValue N0 = Offset.getOperand(0);
+ SDValue N1 = Offset.getOperand(1);
+ SelectMUBUFConstant(N1, SOffset, ImmOffset);
+ VOffset = N0;
+ } else {
+ SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ VOffset = Offset;
+ }
+
+ return true;
+}
+
///
/// \param EncodedOffset This is the immediate value that will be encoded
/// directly into the instruction. On SI/CI the \p EncodedOffset
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 2a1ce1267b4..cdaa662dac2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -722,6 +722,8 @@ def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
+def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">;
+def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">;
def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 4d9c942dbd6..902cb8bcb6f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2107,28 +2107,38 @@ def : Pat <
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
def : Pat<
- (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, 0, 0,
+ (int_amdgcn_buffer_load_format v4i32:$rsrc, 0,
+ (MUBUFIntrinsicOffset i32:$soffset,
+ i16:$offset),
imm:$glc, imm:$slc),
(BUFFER_LOAD_FORMAT_XYZW_OFFSET $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, i32:$vindex, 0,
+ (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicOffset i32:$soffset,
+ i16:$offset),
imm:$glc, imm:$slc),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $vindex, $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, 0, i32:$voffset,
+ (int_amdgcn_buffer_load_format v4i32:$rsrc, 0,
+ (MUBUFIntrinsicVOffset i32:$soffset,
+ i16:$offset,
+ i32:$voffset),
imm:$glc, imm:$slc),
(BUFFER_LOAD_FORMAT_XYZW_OFFEN $voffset, $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, i32:$vindex, i32:$voffset,
+ (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicVOffset i32:$soffset,
+ i16:$offset,
+ i32:$voffset),
imm:$glc, imm:$slc),
(BUFFER_LOAD_FORMAT_XYZW_BOTHEN
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@@ -2137,32 +2147,38 @@ def : Pat<
>;
def : Pat<
- (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc,
- i32:$soffset, imm:$offset, 0, 0,
+ (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, 0,
+ (MUBUFIntrinsicOffset i32:$soffset,
+ i16:$offset),
imm:$glc, imm:$slc),
(BUFFER_STORE_FORMAT_XYZW_OFFSET $vdata, $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc,
- i32:$soffset, imm:$offset, i32:$vindex, 0,
- imm:$glc, imm:$slc),
+ (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicOffset i32:$soffset,
+ i16:$offset),
+ imm:$glc, imm:$slc),
(BUFFER_STORE_FORMAT_XYZW_IDXEN $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc,
- i32:$soffset, imm:$offset, 0, i32:$voffset,
+ (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, 0,
+ (MUBUFIntrinsicVOffset i32:$soffset,
+ i16:$offset,
+ i32:$voffset),
imm:$glc, imm:$slc),
(BUFFER_STORE_FORMAT_XYZW_OFFEN $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$soffset,
- imm:$offset, i32:$vindex, i32:$voffset,
+ (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicVOffset i32:$soffset,
+ i16:$offset,
+ i32:$voffset),
imm:$glc, imm:$slc),
(BUFFER_STORE_FORMAT_XYZW_BOTHEN
$vdata,
OpenPOWER on IntegriCloud