summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2016-03-18 16:24:20 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2016-03-18 16:24:20 +0000
commit3003ba00a3260bdee71dd802bcfa970c3580e6bb (patch)
tree2ee90f91725b6c097dbf0def5e621966580566e2 /llvm/lib/Target
parenta74cd526e9b273f1ca99793e62dfb6d1396bf6b3 (diff)
downloadbcm5719-llvm-3003ba00a3260bdee71dd802bcfa970c3580e6bb.tar.gz
bcm5719-llvm-3003ba00a3260bdee71dd802bcfa970c3580e6bb.zip
AMDGPU: use ComplexPattern for offsets in llvm.amdgcn.buffer.load/store.format
Summary: We cannot easily deduce that an offset is in an SGPR, but the Mesa frontend cannot easily make use of an explicit soffset parameter either. Furthermore, it is likely that in the future, LLVM will be in a better position than the frontend to choose an SGPR offset if possible. Since there aren't any frontend uses of these intrinsics in upstream repositories yet, I would like to take this opportunity to change the intrinsic signatures to a single offset parameter, which is then selected to immediate offsets or voffsets using a ComplexPattern. Reviewers: arsenm, tstellarAMD, mareko Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18218 llvm-svn: 263790
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp79
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td42
3 files changed, 110 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index fb350db9325..18f15288a9c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -127,6 +127,13 @@ private:
SDValue &TFE) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &GLC) const;
+ void SelectMUBUFConstant(SDValue Constant,
+ SDValue &SOffset,
+ SDValue &ImmOffset) const;
+ bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
+ SDValue &ImmOffset) const;
+ bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
+ SDValue &ImmOffset, SDValue &VOffset) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
@@ -1112,6 +1119,78 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
}
+void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
+ SDValue &SOffset,
+ SDValue &ImmOffset) const {
+ SDLoc DL(Constant);
+ uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
+ uint32_t Overflow = 0;
+
+ if (Imm >= 4096) {
+ if (Imm <= 4095 + 64) {
+ // Use an SOffset inline constant for 1..64
+ Overflow = Imm - 4095;
+ Imm = 4095;
+ } else {
+ // Try to keep the same value in SOffset for adjacent loads, so that
+ // the corresponding register contents can be re-used.
+ //
+ // Load values with all low-bits set into SOffset, so that a larger
+ // range of values can be covered using s_movk_i32
+ uint32_t High = (Imm + 1) & ~4095;
+ uint32_t Low = (Imm + 1) & 4095;
+ Imm = Low;
+ Overflow = High - 1;
+ }
+ }
+
+ ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
+
+ if (Overflow <= 64)
+ SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
+ else
+ SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
+ 0);
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
+ SDValue &SOffset,
+ SDValue &ImmOffset) const {
+ SDLoc DL(Offset);
+
+ if (!isa<ConstantSDNode>(Offset))
+ return false;
+
+ SelectMUBUFConstant(Offset, SOffset, ImmOffset);
+
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
+ SDValue &SOffset,
+ SDValue &ImmOffset,
+ SDValue &VOffset) const {
+ SDLoc DL(Offset);
+
+ // Don't generate an unnecessary voffset for constant offsets.
+ if (isa<ConstantSDNode>(Offset))
+ return false;
+
+ if (CurDAG->isBaseWithConstantOffset(Offset)) {
+ SDValue N0 = Offset.getOperand(0);
+ SDValue N1 = Offset.getOperand(1);
+ SelectMUBUFConstant(N1, SOffset, ImmOffset);
+ VOffset = N0;
+ } else {
+ SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ VOffset = Offset;
+ }
+
+ return true;
+}
+
///
/// \param EncodedOffset This is the immediate value that will be encoded
/// directly into the instruction. On SI/CI the \p EncodedOffset
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 2a1ce1267b4..cdaa662dac2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -722,6 +722,8 @@ def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
+def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">;
+def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">;
def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 4d9c942dbd6..902cb8bcb6f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2107,28 +2107,38 @@ def : Pat <
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
def : Pat<
- (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, 0, 0,
+ (int_amdgcn_buffer_load_format v4i32:$rsrc, 0,
+ (MUBUFIntrinsicOffset i32:$soffset,
+ i16:$offset),
imm:$glc, imm:$slc),
(BUFFER_LOAD_FORMAT_XYZW_OFFSET $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, i32:$vindex, 0,
+ (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicOffset i32:$soffset,
+ i16:$offset),
imm:$glc, imm:$slc),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $vindex, $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, 0, i32:$voffset,
+ (int_amdgcn_buffer_load_format v4i32:$rsrc, 0,
+ (MUBUFIntrinsicVOffset i32:$soffset,
+ i16:$offset,
+ i32:$voffset),
imm:$glc, imm:$slc),
(BUFFER_LOAD_FORMAT_XYZW_OFFEN $voffset, $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, i32:$vindex, i32:$voffset,
+ (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicVOffset i32:$soffset,
+ i16:$offset,
+ i32:$voffset),
imm:$glc, imm:$slc),
(BUFFER_LOAD_FORMAT_XYZW_BOTHEN
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@@ -2137,32 +2147,38 @@ def : Pat<
>;
def : Pat<
- (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc,
- i32:$soffset, imm:$offset, 0, 0,
+ (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, 0,
+ (MUBUFIntrinsicOffset i32:$soffset,
+ i16:$offset),
imm:$glc, imm:$slc),
(BUFFER_STORE_FORMAT_XYZW_OFFSET $vdata, $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc,
- i32:$soffset, imm:$offset, i32:$vindex, 0,
- imm:$glc, imm:$slc),
+ (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicOffset i32:$soffset,
+ i16:$offset),
+ imm:$glc, imm:$slc),
(BUFFER_STORE_FORMAT_XYZW_IDXEN $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc,
- i32:$soffset, imm:$offset, 0, i32:$voffset,
+ (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, 0,
+ (MUBUFIntrinsicVOffset i32:$soffset,
+ i16:$offset,
+ i32:$voffset),
imm:$glc, imm:$slc),
(BUFFER_STORE_FORMAT_XYZW_OFFEN $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$soffset,
- imm:$offset, i32:$vindex, i32:$voffset,
+ (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicVOffset i32:$soffset,
+ i16:$offset,
+ i32:$voffset),
imm:$glc, imm:$slc),
(BUFFER_STORE_FORMAT_XYZW_BOTHEN
$vdata,
OpenPOWER on IntegriCloud