summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp36
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp15
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h3
3 files changed, 54 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 61485fc67df..314c73437e0 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1228,6 +1228,42 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
return true;
}
+ const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
+
+ // Align the pointer arguments to this call if the target thinks it's a good
+ // idea
+ unsigned MinSize, PrefAlign;
+ if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+ for (auto &Arg : CI->arg_operands()) {
+ // We want to align both objects whose address is used directly and
+ // objects whose address is used in casts and GEPs, though it only makes
+ // sense for GEPs if the offset is a multiple of the desired alignment and
+ // if size - offset meets the size threshold.
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ APInt Offset(TD->getPointerSizeInBits(
+ cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
+ Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
+ uint64_t Offset2 = Offset.getLimitedValue();
+ AllocaInst *AI;
+ if ((Offset2 & (PrefAlign-1)) == 0 &&
+ (AI = dyn_cast<AllocaInst>(Val)) &&
+ AI->getAlignment() < PrefAlign &&
+ TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
+ AI->setAlignment(PrefAlign);
+ // TODO: Also align GlobalVariables
+ }
+ // If this is a memcpy (or similar) then we may be able to improve the
+ // alignment
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
+ unsigned Align = getKnownAlignment(MI->getDest(), *TD);
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+ Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
+ if (Align > MI->getAlignment())
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
+ }
+ }
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
switch (II->getIntrinsicID()) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 74e8512851e..fb12cc226af 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/CommandLine.h"
@@ -1163,6 +1164,20 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
return TargetLowering::getRegClassFor(VT);
}
+// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
+// source/dest is aligned and the copy size is large enough. We therefore want
+// to align such objects passed to memory intrinsics.
+bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ unsigned &PrefAlign) const {
+ if (!isa<MemIntrinsic>(CI))
+ return false;
+ MinSize = 8;
+ // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
+ // cycle faster than 4-byte aligned LDM.
+ PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
+ return true;
+}
+
// Create a fast isel object.
FastISel *
ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index a364933b38a..682f479278e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -368,6 +368,9 @@ namespace llvm {
return true;
}
+ bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ unsigned &PrefAlign) const override;
+
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
OpenPOWER on IntegriCloud