summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/InterleavedAccessPass.cpp
diff options
context:
space:
mode:
authorEli Friedman <efriedma@quicinc.com>2019-03-28 20:44:50 +0000
committerEli Friedman <efriedma@quicinc.com>2019-03-28 20:44:50 +0000
commit96f295e23bed5b717313f41fb71d81e8f1d49090 (patch)
treee95da9d1d760160bbbcfc0947f6e1376de839ef6 /llvm/lib/CodeGen/InterleavedAccessPass.cpp
parentc87869ebea000dd6483de7c7451cb36c1d36f866 (diff)
downloadbcm5719-llvm-96f295e23bed5b717313f41fb71d81e8f1d49090.tar.gz
bcm5719-llvm-96f295e23bed5b717313f41fb71d81e8f1d49090.zip
[InterleavedAccessPass] Don't increase the number of bytes loaded.
Even if the interleaving transform would otherwise be legal, we shouldn't introduce an interleaved load that is wider than the original load: it might have undefined behavior. It might be possible to perform some sort of mask-narrowing transform in some cases (using a narrower interleaved load, then extending the results using shufflevectors). But I haven't tried to implement that, at least for now. Fixes https://bugs.llvm.org/show_bug.cgi?id=41245 . Differential Revision: https://reviews.llvm.org/D59954 llvm-svn: 357212
Diffstat (limited to 'llvm/lib/CodeGen/InterleavedAccessPass.cpp')
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp12
1 files changed, 9 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 2f8012aeeef..14bc560a561 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -163,14 +163,19 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
/// <0, 2, 4, 6> (mask of index 0 to extract even elements)
/// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
- unsigned &Index, unsigned MaxFactor) {
+ unsigned &Index, unsigned MaxFactor,
+ unsigned NumLoadElements) {
if (Mask.size() < 2)
return false;
// Check potential Factors.
- for (Factor = 2; Factor <= MaxFactor; Factor++)
+ for (Factor = 2; Factor <= MaxFactor; Factor++) {
+ // Make sure we don't produce a load wider than the input load.
+ if (Mask.size() * Factor > NumLoadElements)
+ return false;
if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
return true;
+ }
return false;
}
@@ -302,9 +307,10 @@ bool InterleavedAccess::lowerInterleavedLoad(
unsigned Factor, Index;
+ unsigned NumLoadElements = LI->getType()->getVectorNumElements();
// Check if the first shufflevector is DE-interleave shuffle.
if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index,
- MaxFactor))
+ MaxFactor, NumLoadElements))
return false;
// Holds the corresponding index for each DE-interleave shuffle.
OpenPOWER on IntegriCloud