summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp34
-rw-r--r--llvm/test/CodeGen/ARM/ldrd.ll52
-rw-r--r--llvm/test/CodeGen/ARM/swift-vldm.ll1
-rw-r--r--llvm/test/CodeGen/Thumb2/thumb2-ldm.ll55
4 files changed, 111 insertions, 31 deletions
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 5ee6641720e..cc49f9d549b 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -60,6 +60,15 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
+/// This switch disables formation of double/multi instructions that could
+/// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
+/// disabled. This can be used to create libraries that are robust even when
+/// users provoke undefined behaviour by supplying misaligned pointers.
+/// \see mayCombineMisaligned()
+static cl::opt<bool>
+AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
+ cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
+
namespace llvm {
void initializeARMLoadStoreOptPass(PassRegistry &);
}
@@ -916,6 +925,24 @@ static bool isValidLSDoubleOffset(int Offset) {
return (Value % 4) == 0 && Value < 1024;
}
+/// Return true for loads/stores that can be combined to a double/multi
+/// operation without increasing the requirements for alignment.
+static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
+ const MachineInstr &MI) {
+ // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
+ // difference.
+ unsigned Opcode = MI.getOpcode();
+ if (!isi32Load(Opcode) && !isi32Store(Opcode))
+ return true;
+
+ // Stack pointer alignment is out of the programmers control so we can trust
+ // SP-relative loads/stores.
+ if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
+ STI.getFrameLowering()->getTransientStackAlignment() >= 4)
+ return true;
+ return false;
+}
+
/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
const MachineInstr *FirstMI = MemOps[0].MI;
@@ -954,6 +981,10 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
if (PReg == ARM::SP || PReg == ARM::PC)
CanMergeToLSMulti = CanMergeToLSDouble = false;
+ // Should we be conservative?
+ if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
+ CanMergeToLSMulti = CanMergeToLSDouble = false;
+
// Merge following instructions where possible.
for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
int NewOffset = MemOps[I].Offset;
@@ -1926,6 +1957,9 @@ INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ if (AssumeMisalignedLoadStores)
+ return false;
+
TD = &Fn.getDataLayout();
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
TII = STI->getInstrInfo();
diff --git a/llvm/test/CodeGen/ARM/ldrd.ll b/llvm/test/CodeGen/ARM/ldrd.ll
index b2596346bfa..dd97fbfd640 100644
--- a/llvm/test/CodeGen/ARM/ldrd.ll
+++ b/llvm/test/CodeGen/ARM/ldrd.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs | FileCheck %s -check-prefix=A8 -check-prefix=CHECK
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3 -check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs | FileCheck %s -check-prefix=A8 -check-prefix=CHECK -check-prefix=NORMAL
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3 -check-prefix=CHECK -check-prefix=NORMAL
; rdar://6949835
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC -check-prefix=CHECK
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY -check-prefix=CHECK
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT -check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC -check-prefix=CHECK -check-prefix=NORMAL
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY -check-prefix=CHECK -check-prefix=NORMAL
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT -check-prefix=CHECK -check-prefix=NORMAL
+
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-assume-misaligned-load-store | FileCheck %s -check-prefix=CHECK -check-prefix=CONSERVATIVE
; Magic ARM pair hints works best with linearscan / fast.
@@ -15,12 +17,13 @@ declare void @use_i64(i64 %v)
define void @test_ldrd(i64 %a) nounwind readonly {
; CHECK-LABEL: test_ldrd:
-; CHECK: bl{{x?}} _get_ptr
+; NORMAL: bl{{x?}} _get_ptr
; A8: ldrd r0, r1, [r0]
; Cortex-M3 errata 602117: LDRD with base in list may result in incorrect base
; register when interrupted or faulted.
; M3-NOT: ldrd r[[REGNUM:[0-9]+]], {{r[0-9]+}}, [r[[REGNUM]]]
-; CHECK: bl{{x?}} _use_i64
+; CONSERVATIVE-NOT: ldrd
+; NORMAL: bl{{x?}} _use_i64
%ptr = call i64* @get_ptr()
%v = load i64, i64* %ptr, align 8
call void @use_i64(i64 %v)
@@ -39,11 +42,10 @@ define void @test_ldrd(i64 %a) nounwind readonly {
; evict another live range or use callee saved regs. Sorry if the test
; is sensitive to Regalloc changes, but it is an interesting case.
;
-; BASIC: @f
+; CHECK-LABEL: f:
; BASIC: %bb
; BASIC: ldrd
; BASIC: str
-; GREEDY: @f
; GREEDY: %bb
; GREEDY: ldrd
; GREEDY: str
@@ -76,14 +78,15 @@ return: ; preds = %bb, %entry
@TestVar = external global %struct.Test
+; CHECK-LABEL: Func1:
define void @Func1() nounwind ssp {
-; CHECK: @Func1
entry:
; A8: movw [[BASE:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}}
; A8: movt [[BASE]], :upper16:{{.*}}TestVar{{.*}}
; A8: ldrd [[FIELD1:r[0-9]+]], [[FIELD2:r[0-9]+]], {{\[}}[[BASE]], #4]
; A8-NEXT: add [[FIELD1]], [[FIELD2]]
; A8-NEXT: str [[FIELD1]], {{\[}}[[BASE]]{{\]}}
+; CONSERVATIVE-NOT: ldrd
%orig_blocks = alloca [256 x i16], align 2
%0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start(i64 512, i8* %0) nounwind
%tmp1 = load i32, i32* getelementptr inbounds (%struct.Test, %struct.Test* @TestVar, i32 0, i32 1), align 4
@@ -97,8 +100,9 @@ entry:
declare void @extfunc(i32, i32, i32, i32)
; CHECK-LABEL: Func2:
+; CONSERVATIVE-NOT: ldrd
; A8: ldrd
-; A8: blx
+; CHECK: bl{{x?}} _extfunc
; A8: pop
define void @Func2(i32* %p) {
entry:
@@ -116,12 +120,14 @@ entry:
; M3: strd r1, r0, [sp, #-8]!
; BASIC: strd r1, r0, [sp, #-8]!
; GREEDY: strd r0, r1, [sp, #-8]!
-; CHECK: @ InlineAsm Start
-; CHECK: @ InlineAsm End
+; CONSERVATIVE: strd r0, r1, [sp, #-8]!
+; NORMAL: @ InlineAsm Start
+; NORMAL: @ InlineAsm End
; A8: ldrd r2, r1, [sp]
; M3: ldrd r2, r1, [sp]
; BASIC: ldrd r2, r1, [sp]
; GREEDY: ldrd r1, r2, [sp]
+; CONSERVATIVE: ldrd r1, r2, [sp]
; CHECK: bl{{x?}} _extfunc
define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) {
; force %v0 and %v1 to be spilled
@@ -134,8 +140,9 @@ define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) {
declare void @extfunc2(i32*, i32, i32)
; CHECK-LABEL: ldrd_postupdate_dec:
-; CHECK: ldrd r1, r2, [r0], #-8
-; CHECK-NEXT: bl{{x?}} _extfunc
+; NORMAL: ldrd r1, r2, [r0], #-8
+; CONSERVATIVE-NOT: ldrd
+; CHECK: bl{{x?}} _extfunc
define void @ldrd_postupdate_dec(i32* %p0) {
%p0.1 = getelementptr i32, i32* %p0, i32 1
%v0 = load i32, i32* %p0
@@ -146,8 +153,9 @@ define void @ldrd_postupdate_dec(i32* %p0) {
}
; CHECK-LABEL: ldrd_postupdate_inc:
-; CHECK: ldrd r1, r2, [r0], #8
-; CHECK-NEXT: bl{{x?}} _extfunc
+; NORMAL: ldrd r1, r2, [r0], #8
+; CONSERVATIVE-NOT: ldrd
+; CHECK: bl{{x?}} _extfunc
define void @ldrd_postupdate_inc(i32* %p0) {
%p0.1 = getelementptr i32, i32* %p0, i32 1
%v0 = load i32, i32* %p0
@@ -158,8 +166,9 @@ define void @ldrd_postupdate_inc(i32* %p0) {
}
; CHECK-LABEL: strd_postupdate_dec:
-; CHECK: strd r1, r2, [r0], #-8
-; CHECK-NEXT: bx lr
+; NORMAL: strd r1, r2, [r0], #-8
+; CONSERVATIVE-NOT: strd
+; CHECK: bx lr
define i32* @strd_postupdate_dec(i32* %p0, i32 %v0, i32 %v1) {
%p0.1 = getelementptr i32, i32* %p0, i32 1
store i32 %v0, i32* %p0
@@ -169,8 +178,9 @@ define i32* @strd_postupdate_dec(i32* %p0, i32 %v0, i32 %v1) {
}
; CHECK-LABEL: strd_postupdate_inc:
-; CHECK: strd r1, r2, [r0], #8
-; CHECK-NEXT: bx lr
+; NORMAL: strd r1, r2, [r0], #8
+; CONSERVATIVE-NOT: strd
+; CHECK: bx lr
define i32* @strd_postupdate_inc(i32* %p0, i32 %v0, i32 %v1) {
%p0.1 = getelementptr i32, i32* %p0, i32 1
store i32 %v0, i32* %p0
diff --git a/llvm/test/CodeGen/ARM/swift-vldm.ll b/llvm/test/CodeGen/ARM/swift-vldm.ll
index 9e507279fa0..a53b2413bde 100644
--- a/llvm/test/CodeGen/ARM/swift-vldm.ll
+++ b/llvm/test/CodeGen/ARM/swift-vldm.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mcpu=swift -mtriple=armv7s-apple-ios | FileCheck %s
+; RUN: llc < %s -arm-assume-misaligned-load-store -mcpu=swift -mtriple=armv7s-apple-ios | FileCheck %s
; Check that we avoid producing vldm instructions using d registers that
; begin in the most-significant half of a q register. These require more
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ldm.ll b/llvm/test/CodeGen/Thumb2/thumb2-ldm.ll
index 28903aca326..a5b47411124 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ldm.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -1,12 +1,15 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+thumb2 | FileCheck %s -check-prefix=ALL -check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+thumb2 -arm-assume-misaligned-load-store | FileCheck %s -check-prefix=ALL -check-prefix=CONSERVATIVE
@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5]
define i32 @t1() {
-; CHECK-LABEL: t1:
-; CHECK: push {r7, lr}
+; ALL-LABEL: t1:
+; ALL: push {r7, lr}
; CHECK: ldrd
-; CHECK: pop {r7, pc}
+; CONSERVATIVE-NOT: ldrd
+; CONSERVATIVE-NOT: ldm
+; ALL: pop {r7, pc}
%tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
%tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
@@ -14,10 +17,12 @@ define i32 @t1() {
}
define i32 @t2() {
-; CHECK-LABEL: t2:
-; CHECK: push {r7, lr}
+; ALL-LABEL: t2:
+; ALL: push {r7, lr}
; CHECK: ldm
-; CHECK: pop {r7, pc}
+; CONSERVATIVE-NOT: ldrd
+; CONSERVATIVE-NOT: ldm
+; ALL: pop {r7, pc}
%tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
%tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
%tmp5 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
@@ -26,10 +31,12 @@ define i32 @t2() {
}
define i32 @t3() {
-; CHECK-LABEL: t3:
-; CHECK: push {r7, lr}
+; ALL-LABEL: t3:
+; ALL: push {r7, lr}
; CHECK: ldm
-; CHECK: pop {r7, pc}
+; CONSERVATIVE-NOT: ldrd
+; CONSERVATIVE-NOT: ldm
+; ALL: pop {r7, pc}
%tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
%tmp5 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
@@ -37,6 +44,34 @@ define i32 @t3() {
ret i32 %tmp6
}
+@g = common global i32* null
+
+define void @t4(i32 %a0, i32 %a1, i32 %a2) {
+; ALL-LABEL: t4:
+; ALL: stm.w sp, {r0, r1, r2}
+; ALL: blx _ext
+; ALL: ldm.w sp, {r0, r1, r2}
+; ALL: blx _f2
+ %arr = alloca [4 x i32], align 4
+ %p0 = getelementptr inbounds [4 x i32], [4 x i32]* %arr, i64 0, i64 0
+ %p1 = getelementptr inbounds [4 x i32], [4 x i32]* %arr, i64 0, i64 1
+ %p2 = getelementptr inbounds [4 x i32], [4 x i32]* %arr, i64 0, i64 2
+ store i32* %p0, i32** @g, align 8
+
+ store i32 %a0, i32* %p0, align 4
+ store i32 %a1, i32* %p1, align 4
+ store i32 %a2, i32* %p2, align 4
+ call void @ext()
+
+ %v0 = load i32, i32* %p0, align 4
+ %v1 = load i32, i32* %p1, align 4
+ %v2 = load i32, i32* %p2, align 4
+ call i32 @f2(i32 %v0, i32 %v1, i32 %v2)
+ ret void
+}
+
declare i32 @f1(i32, i32)
declare i32 @f2(i32, i32, i32)
+
+declare void @ext()
OpenPOWER on IntegriCloud