summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/ARM/ARMISelLowering.cpp
diff options
context:
space:
mode:
authorMatthew Simpson <mssimpso@codeaurora.org>2017-04-10 18:34:37 +0000
committerMatthew Simpson <mssimpso@codeaurora.org>2017-04-10 18:34:37 +0000
commit1468d3e04ee4f93e7951e902afbc74e585243f3f (patch)
treeb284aae10829b331611f33dd37f565ab2936b716 /llvm/lib/Target/ARM/ARMISelLowering.cpp
parent0d830ff7bf86e68d53931259af81dfa70bb9935b (diff)
downloadbcm5719-llvm-1468d3e04ee4f93e7951e902afbc74e585243f3f.tar.gz
bcm5719-llvm-1468d3e04ee4f93e7951e902afbc74e585243f3f.zip
[ARM/AArch64] Ensure valid vector element types for interleaved accesses
This patch refactors and strengthens the type checks performed for interleaved accesses. The primary functional change is to ensure that the interleaved accesses have valid element types. The added test cases previously failed because the element type is f128. Differential Revision: https://reviews.llvm.org/D31817 llvm-svn: 299864
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp56
1 files changed, 32 insertions, 24 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 93eed9160a0..e697c8ca533 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -13593,11 +13593,37 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
/// A helper function for determining the number of interleaved accesses we
/// will generate when lowering accesses of the given type.
-static unsigned getNumInterleavedAccesses(VectorType *VecTy,
- const DataLayout &DL) {
+unsigned
+ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const {
return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
}
+bool ARMTargetLowering::isLegalInterleavedAccessType(
+ VectorType *VecTy, const DataLayout &DL) const {
+
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+
+ // Ensure the vector doesn't have f16 elements. Even though we could do an
+ // i16 vldN, we can't hold the f16 vectors and will end up converting via
+ // f32.
+ if (VecTy->getElementType()->isHalfTy())
+ return false;
+
+ // Ensure the number of vector elements is greater than 1.
+ if (VecTy->getNumElements() < 2)
+ return false;
+
+ // Ensure the element type is legal.
+ if (ElSize != 8 && ElSize != 16 && ElSize != 32)
+ return false;
+
+ // Ensure the total vector size is 64 or a multiple of 128. Types larger than
+ // 128 will be split into multiple interleaved accesses.
+ return VecSize == 64 || VecSize % 128 == 0;
+}
+
/// \brief Lower an interleaved load into a vldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -13622,20 +13648,11 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Type *EltTy = VecTy->getVectorElementType();
const DataLayout &DL = LI->getModule()->getDataLayout();
- unsigned VecSize = DL.getTypeSizeInBits(VecTy);
- bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip if we do not have NEON and skip illegal vector types and vector types
- // with i64/f64 elements (vldN doesn't support i64/f64 elements). We can
+ // Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
- if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize % 128 != 0) ||
- EltIs64Bits)
- return false;
-
- // Skip if the vector has f16 elements: even though we could do an i16 vldN,
- // we can't hold the f16 vectors and will end up converting via f32.
- if (EltTy->isHalfTy())
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
return false;
unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
@@ -13765,20 +13782,11 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
- bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip if we do not have NEON and skip illegal vector types and vector types
- // with i64/f64 elements (vldN doesn't support i64/f64 elements). We can
+ // Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
- if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize % 128 != 0) ||
- EltIs64Bits)
- return false;
-
- // Skip if the vector has f16 elements: even though we could do an i16 vldN,
- // we can't hold the f16 vectors and will end up converting via f32.
- if (EltTy->isHalfTy())
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
return false;
unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
OpenPOWER on IntegriCloud