summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
authorFarhana Aleen <farhana.aleen@gmail.com>2018-03-09 17:41:39 +0000
committerFarhana Aleen <farhana.aleen@gmail.com>2018-03-09 17:41:39 +0000
commita7cb31123c2526f04e6a587d6ada4084cefe6fb4 (patch)
tree0f2e93bca2bbcf7eb638c5a1dad49127172cd5f8 /llvm/lib/Target/AMDGPU/SIISelLowering.cpp
parent91fc4e09499c24e9184853e9a53a953bdea8a356 (diff)
downloadbcm5719-llvm-a7cb31123c2526f04e6a587d6ada4084cefe6fb4.tar.gz
bcm5719-llvm-a7cb31123c2526f04e6a587d6ada4084cefe6fb4.zip
[AMDGPU] Supported ds_read_b128 generation; Widened vector length for local address-space.
Summary: Starting from GCN 2nd generation, ISA supports ds_read_b128 on top of ds_read_b64. This patch supports ds_read_b128 instruction pattern and generation of this instruction. In the vectorizer, this patch also widen the vector length so that vectorizer generates 128 bit loads for local address-space which gets translated to ds_read_b128. Since the performance benefit is not clear; compiler generates ds_read_b128 under -amdgpu-ds128. Author: FarhanaAleen Reviewed By: rampitec, arsenm Subscribers: llvm-commits, AMDGPU Differential Revision: https://reviews.llvm.org/D44210 llvm-svn: 327153
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp16
1 files changed, 10 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 68a45cb8817..8463b22d60a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -94,6 +94,11 @@ static cl::opt<bool> EnableVGPRIndexMode(
cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
cl::init(false));
+static cl::opt<bool> EnableDS128(
+ "amdgpu-ds128",
+ cl::desc("Use DS_read/write_b128"),
+ cl::init(false));
+
static cl::opt<unsigned> AssumeFrameIndexHighZeroBits(
"amdgpu-frame-index-zero-bits",
cl::desc("High bits of frame index assumed to be zero"),
@@ -5425,14 +5430,13 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("unsupported private_element_size");
}
} else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
- if (NumElements > 2)
- return SplitVectorLoad(Op, DAG);
-
- if (NumElements == 2)
+ // Use ds_read_b128 if possible.
+ if (Subtarget->useDS128(EnableDS128) && Load->getAlignment() >= 16 &&
+ MemVT.getStoreSize() == 16)
return SDValue();
- // If properly aligned, if we split we might be able to use ds_read_b64.
- return SplitVectorLoad(Op, DAG);
+ if (NumElements > 2)
+ return SplitVectorLoad(Op, DAG);
}
return SDValue();
}
OpenPOWER on IntegriCloud