diff options
| author | Farhana Aleen <farhana.aleen@gmail.com> | 2018-03-07 17:09:18 +0000 |
|---|---|---|
| committer | Farhana Aleen <farhana.aleen@gmail.com> | 2018-03-07 17:09:18 +0000 |
| commit | 89196642f72cc3325963918c0426ce128c414104 (patch) | |
| tree | 1c0fb19b4dcb115932854c6cfc86768b38a5e136 /llvm/lib/Target/AMDGPU/SIISelLowering.cpp | |
| parent | c0e768df9050133466695f3a8f3bf0b3bb343987 (diff) | |
| download | bcm5719-llvm-89196642f72cc3325963918c0426ce128c414104.tar.gz bcm5719-llvm-89196642f72cc3325963918c0426ce128c414104.zip | |
[AMDGPU] Increased vector length for global/constant loads.
Summary: GCN ISA supports instructions that can read 16 consecutive dwords from memory through the scalar data cache;
loadstoreVectorizer should take advantage of the wider vector length and pack 16/8 elements of dwords/quadwords.
Author: FarhanaAleen
Reviewed By: rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D44179
llvm-svn: 326910
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 54aef36333d..68a45cb8817 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3454,6 +3454,10 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } +static bool isDwordAligned(unsigned Alignment) { + return Alignment % 4 == 0; +} + //===----------------------------------------------------------------------===// // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// @@ -5353,9 +5357,10 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType().getVectorElementType() == MVT::i32 && "Custom lowering for non-i32 vectors hasn't been implemented."); + unsigned Alignment = Load->getAlignment(); unsigned AS = Load->getAddressSpace(); if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - AS, Load->getAlignment())) { + AS, Alignment)) { SDValue Ops[2]; std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); return DAG.getMergeValues(Ops, DL); @@ -5383,7 +5388,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT || AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() && - !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load)) + !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) && + isDwordAligned(Alignment)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private |

