diff options
author | Farhana Aleen <farhana.aleen@gmail.com> | 2018-03-09 17:41:39 +0000 |
---|---|---|
committer | Farhana Aleen <farhana.aleen@gmail.com> | 2018-03-09 17:41:39 +0000 |
commit | a7cb31123c2526f04e6a587d6ada4084cefe6fb4 (patch) | |
tree | 0f2e93bca2bbcf7eb638c5a1dad49127172cd5f8 /llvm/lib/Target/AMDGPU/SIISelLowering.cpp | |
parent | 91fc4e09499c24e9184853e9a53a953bdea8a356 (diff) | |
download | bcm5719-llvm-a7cb31123c2526f04e6a587d6ada4084cefe6fb4.tar.gz bcm5719-llvm-a7cb31123c2526f04e6a587d6ada4084cefe6fb4.zip |
[AMDGPU] Supported ds_read_b128 generation; Widened vector length for local address-space.
Summary: Starting from GCN 2nd generation, ISA supports ds_read_b128 on top of ds_read_b64.
This patch supports ds_read_b128 instruction pattern and generation of this instruction.
In the vectorizer, this patch also widen the vector length so that vectorizer generates
128 bit loads for local address-space which gets translated to ds_read_b128.
Since the performance benefit is not clear; compiler generates ds_read_b128 under -amdgpu-ds128.
Author: FarhanaAleen
Reviewed By: rampitec, arsenm
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D44210
llvm-svn: 327153
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 16 |
1 files changed, 10 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 68a45cb8817..8463b22d60a 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -94,6 +94,11 @@ static cl::opt<bool> EnableVGPRIndexMode( cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false)); +static cl::opt<bool> EnableDS128( + "amdgpu-ds128", + cl::desc("Use DS_read/write_b128"), + cl::init(false)); + static cl::opt<unsigned> AssumeFrameIndexHighZeroBits( "amdgpu-frame-index-zero-bits", cl::desc("High bits of frame index assumed to be zero"), @@ -5425,14 +5430,13 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("unsupported private_element_size"); } } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { - if (NumElements > 2) - return SplitVectorLoad(Op, DAG); - - if (NumElements == 2) + // Use ds_read_b128 if possible. + if (Subtarget->useDS128(EnableDS128) && Load->getAlignment() >= 16 && + MemVT.getStoreSize() == 16) return SDValue(); - // If properly aligned, if we split we might be able to use ds_read_b64. - return SplitVectorLoad(Op, DAG); + if (NumElements > 2) + return SplitVectorLoad(Op, DAG); } return SDValue(); } |