diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2013-09-16 22:43:16 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2013-09-16 22:43:16 +0000 |
| commit | 899f7d2b00a59f284fdb8915b63813705ce66148 (patch) | |
| tree | 45bbbd35a103203498bfbd2fd58ad14d262a32a0 | |
| parent | e41f37d99db1e341b3cf24ed49f21b58d63c15c4 (diff) | |
| download | bcm5719-llvm-899f7d2b00a59f284fdb8915b63813705ce66148.tar.gz bcm5719-llvm-899f7d2b00a59f284fdb8915b63813705ce66148.zip | |
MemCpyOptimizer: Use max legal int size instead of pointer size
If there are no legal integers, assume 1 byte.
This makes more sense than using the pointer size as
a guess for the maximum GPR width.
It is conceivable to want to use some 64-bit pointers
on a target where 64-bit integers aren't legal.
llvm-svn: 190817
| -rw-r--r-- | llvm/include/llvm/IR/DataLayout.h | 11 | ||||
| -rw-r--r-- | llvm/lib/IR/DataLayout.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 13 |
3 files changed, 26 insertions, 5 deletions
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 25f7569e246..10630a26200 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -369,6 +369,17 @@ public: /// least as big as Width bits. Type *getSmallestLegalIntType(LLVMContext &C, unsigned Width = 0) const; + /// getLargestLegalIntType - Return the largest legal integer type, or null if + /// none are set. + Type *getLargestLegalIntType(LLVMContext &C) const { + unsigned LargestSize = getLargestLegalIntTypeSize(); + return (LargestSize == 0) ? 0 : Type::getIntNTy(C, LargestSize); + } + + /// getLargestLegalIntType - Return the size of largest legal integer type + /// size, or 0 if none are set. + unsigned getLargestLegalIntTypeSize() const; + /// getIndexedOffset - return the offset from the beginning of the type for /// the specified indices. This is used to implement getelementptr. uint64_t getIndexedOffset(Type *Ty, ArrayRef<Value *> Indices) const; diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index d786d33f9ca..6bdc09eaee4 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -629,6 +629,13 @@ Type *DataLayout::getSmallestLegalIntType(LLVMContext &C, unsigned Width) const return 0; } +unsigned DataLayout::getLargestLegalIntTypeSize() const { + unsigned MaxWidth = 0; + for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i) + MaxWidth = std::max<unsigned>(MaxWidth, LegalIntWidths[i]); + return MaxWidth; +} + uint64_t DataLayout::getIndexedOffset(Type *ptrTy, ArrayRef<Value *> Indices) const { Type *Ty = ptrTy; diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 8f61ffd84bf..9912d3dafed 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -170,14 +170,17 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const { // pessimize the llvm optimizer. // // Since we don't have perfect knowledge here, make some assumptions: assume - // the maximum GPR width is the same size as the pointer size and assume that - // this width can be stored. If so, check to see whether we will end up - // actually reducing the number of stores used. + // the maximum GPR width is the same size as the largest legal integer + // size. If so, check to see whether we will end up actually reducing the + // number of stores used. unsigned Bytes = unsigned(End-Start); - unsigned NumPointerStores = Bytes/TD.getPointerSize(); + unsigned MaxIntSize = TD.getLargestLegalIntTypeSize(); + if (MaxIntSize == 0) + MaxIntSize = 1; + unsigned NumPointerStores = Bytes / MaxIntSize; // Assume the remaining bytes if any are done a byte at a time. - unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(); + unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize; // If we will reduce the # stores (according to this heuristic), do the // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 |

