summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorAlina Sbirlea <asbirlea@google.com>2016-07-11 20:46:17 +0000
committerAlina Sbirlea <asbirlea@google.com>2016-07-11 20:46:17 +0000
commit327955e057a7bc7ae68ad5baac0ba818dc5f0144 (patch)
tree27d199a9e11dd463f2d8df3d20725d3ef9084e12 /llvm
parentcfbac5f3612544f8acd63cba72dd44484a393cd4 (diff)
downloadbcm5719-llvm-327955e057a7bc7ae68ad5baac0ba818dc5f0144.tar.gz
bcm5719-llvm-327955e057a7bc7ae68ad5baac0ba818dc5f0144.zip
Add TLI.allowsMisalignedMemoryAccesses to LoadStoreVectorizer
Summary: Extend TTI to access TLI.allowsMisalignedMemoryAccesses(). Check condition when vectorizing load and store chains. Add additional parameters: AddressSpace, Alignment, Fast. Reviewers: llvm-commits, jlebar Subscribers: arsenm, mzolotukhin Differential Revision: http://reviews.llvm.org/D21935 llvm-svn: 275100
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h14
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h5
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h5
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp58
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll19
6 files changed, 71 insertions, 38 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 50c8e8aaec2..7d11d4df638 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -388,6 +388,11 @@ public:
/// operations, shuffles, or casts.
bool isFPVectorizationPotentiallyUnsafe() const;
+ /// \brief Determine if the target supports unaligned memory accesses.
+ bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0,
+ unsigned Alignment = 1,
+ bool *Fast = nullptr) const;
+
/// \brief Return hardware support for population count.
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
@@ -653,6 +658,10 @@ public:
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
virtual bool enableInterleavedAccessVectorization() = 0;
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
+ virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth,
+ unsigned AddressSpace,
+ unsigned Alignment,
+ bool *Fast) = 0;
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
virtual bool haveFastSqrt(Type *Ty) = 0;
virtual int getFPOpCost(Type *Ty) = 0;
@@ -820,6 +829,11 @@ public:
bool isFPVectorizationPotentiallyUnsafe() override {
return Impl.isFPVectorizationPotentiallyUnsafe();
}
+ bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
+ unsigned Alignment, bool *Fast) override {
+ return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
+ Alignment, Fast);
+ }
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
return Impl.getPopcntSupport(IntTyWidthInBit);
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index b9baee10104..52e7de6b222 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -244,6 +244,11 @@ public:
bool isFPVectorizationPotentiallyUnsafe() { return false; }
+ bool allowsMisalignedMemoryAccesses(unsigned BitWidth,
+ unsigned AddressSpace,
+ unsigned Alignment,
+ bool *Fast) { return false; }
+
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
return TTI::PSK_Software;
}
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index e4b0aa84323..69951afb623 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -105,6 +105,11 @@ public:
/// \name Scalar TTI Implementations
/// @{
+ bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
+ unsigned Alignment, bool *Fast) const {
+ MVT M = MVT::getIntegerVT(BitWidth);
+ return getTLI()->allowsMisalignedMemoryAccesses(M, AddressSpace, Alignment, Fast);
+ }
bool hasBranchDivergence() { return false; }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 51440f5b2df..55b50ae42bc 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -186,6 +186,14 @@ bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
return TTIImpl->isFPVectorizationPotentiallyUnsafe();
}
+bool TargetTransformInfo::allowsMisalignedMemoryAccesses(unsigned BitWidth,
+ unsigned AddressSpace,
+ unsigned Alignment,
+ bool *Fast) const {
+ return TTIImpl->allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
+ Alignment, Fast);
+}
+
TargetTransformInfo::PopcntSupportKind
TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
return TTIImpl->getPopcntSupport(IntTyWidthInBit);
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 8166361636f..9c581a4603b 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -127,6 +127,10 @@ private:
/// Vectorizes the store instructions in Chain.
bool vectorizeStoreChain(ArrayRef<Value *> Chain);
+
+ /// Check if this load/store access is misaligned accesses
+ bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
+ unsigned Alignment);
};
class LoadStoreVectorizer : public FunctionPass {
@@ -692,18 +696,16 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
unsigned Alignment = getAlignment(S0);
// If the store is going to be misaligned, don't vectorize it.
- // TODO: Check TLI.allowsMisalignedMemoryAccess
- if ((Alignment % SzInBytes) != 0 && (Alignment % TargetBaseAlign) != 0) {
- if (S0->getPointerAddressSpace() == 0) {
- // If we're storing to an object on the stack, we control its alignment,
- // so we can cheat and change it!
- Value *V = GetUnderlyingObject(S0->getPointerOperand(), DL);
- if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {
- AI->setAlignment(TargetBaseAlign);
- Alignment = TargetBaseAlign;
- } else {
- return false;
- }
+ if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
+ if (S0->getPointerAddressSpace() != 0)
+ return false;
+
+ // If we're storing to an object on the stack, we control its alignment,
+ // so we can cheat and change it!
+ Value *V = GetUnderlyingObject(S0->getPointerOperand(), DL);
+ if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {
+ AI->setAlignment(TargetBaseAlign);
+ Alignment = TargetBaseAlign;
} else {
return false;
}
@@ -821,18 +823,16 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
unsigned Alignment = getAlignment(L0);
// If the load is going to be misaligned, don't vectorize it.
- // TODO: Check TLI.allowsMisalignedMemoryAccess and remove TargetBaseAlign.
- if ((Alignment % SzInBytes) != 0 && (Alignment % TargetBaseAlign) != 0) {
- if (L0->getPointerAddressSpace() == 0) {
- // If we're loading from an object on the stack, we control its alignment,
- // so we can cheat and change it!
- Value *V = GetUnderlyingObject(L0->getPointerOperand(), DL);
- if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {
- AI->setAlignment(TargetBaseAlign);
- Alignment = TargetBaseAlign;
- } else {
- return false;
- }
+ if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
+ if (L0->getPointerAddressSpace() != 0)
+ return false;
+
+ // If we're loading from an object on the stack, we control its alignment,
+ // so we can cheat and change it!
+ Value *V = GetUnderlyingObject(L0->getPointerOperand(), DL);
+ if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {
+ AI->setAlignment(TargetBaseAlign);
+ Alignment = TargetBaseAlign;
} else {
return false;
}
@@ -915,3 +915,13 @@ bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
NumScalarsVectorized += Chain.size();
return true;
}
+
+bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
+ unsigned Alignment) {
+ bool Fast = false;
+ bool Allows = TTI.allowsMisalignedMemoryAccesses(SzInBytes * 8, AddressSpace,
+ Alignment, &Fast);
+ // TODO: Remove TargetBaseAlign
+ return !(Allows && Fast) && (Alignment % SzInBytes) != 0 &&
+ (Alignment % TargetBaseAlign) != 0;
+}
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
index 961ab1d1fa3..03265efe284 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
@@ -19,8 +19,7 @@ define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 {
}
; CHECK-LABEL: @merge_global_store_2_constants_i8_natural_align
-; CHECK: store i8
-; CHECK: store i8
+; CHECK: store <2 x i8>
define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
@@ -50,8 +49,7 @@ define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 {
}
; CHECK-LABEL: @merge_global_store_2_constants_i16_natural_align
-; CHECK: store i16
-; CHECK: store i16
+; CHECK: store <2 x i16>
define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
@@ -61,8 +59,7 @@ define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)*
}
; CHECK-LABEL: @merge_global_store_2_constants_half_natural_align
-; CHECK: store half
-; CHECK: store half
+; CHECK: store <2 x half>
define void @merge_global_store_2_constants_half_natural_align(half addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
@@ -432,14 +429,8 @@ define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 ad
}
; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i8_natural_align
-; CHECK: load i8
-; CHECK: load i8
-; CHECK: load i8
-; CHECK: load i8
-; CHECK: store i8
-; CHECK: store i8
-; CHECK: store i8
-; CHECK: store i8
+; CHECK: load <4 x i8>
+; CHECK: store <4 x i8>
define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
%out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
OpenPOWER on IntegriCloud