diff options
author | Momchil Velikov <momchil.velikov@arm.com> | 2019-11-06 10:58:43 +0000 |
---|---|---|
committer | Momchil Velikov <momchil.velikov@arm.com> | 2019-11-06 12:46:50 +0000 |
commit | d91ea7fc6fd0cdd530e63c24db1d45253e1fdfdb (patch) | |
tree | 2161f6d5a5915e9489e2eb6354352a58601e96f5 | |
parent | 63f08a59c8942e99d71cfef21cfc050b6f3d6d52 (diff) | |
download | bcm5719-llvm-d91ea7fc6fd0cdd530e63c24db1d45253e1fdfdb.tar.gz bcm5719-llvm-d91ea7fc6fd0cdd530e63c24db1d45253e1fdfdb.zip |
[AArch64] Move the branch relaxation pass after BTI insertion
Summary:
Inserting BTI instructions can push branch destinations out of range.
The branch relaxation pass itself cannot insert indirect branches since `TargetInstrInfo::insertIndirecrtBranch` is not implemented for AArch64 (guess +/-128 MB direct branch range is more than enough in practice).
Testing this is a bit tricky.
The original test case we have is 155kloc/6.1M. I've generated a test case using this program:
```
int main() {
std::cout << R"src(int test();
void g0(), g1(), g2(), g3(), g4(), e();
void f(int v) {
if ((test() & 2) == 0) {
switch (v) {
case 0:
g0();
case 1:
g1();
case 2:
g2();
case 3:
g3();
}
)src";
const int N = 8176;
for (int i = 0; i < N; ++i)
std::cout << " void h" << i << "();\n";
for (int i = 0; i < N; ++i)
std::cout << " h" << i << "();\n";
std::cout << R"src(
} else {
e();
}
}
)src";
}
```
which is still a bit too much to commit as a regression test, IMHO.
Reviewers: t.p.northover, ostannard
Reviewed By: ostannard
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69118
Change-Id: Ide5c922bcde08ff4cf635da5e52365525a997a0a
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/O0-pipeline.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/O3-pipeline.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll | 64 |
4 files changed, 70 insertions, 5 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 0ec1e667d69..4101d0abad9 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -614,14 +614,15 @@ void AArch64PassConfig::addPreEmitPass() { if (EnableA53Fix835769) addPass(createAArch64A53Fix835769()); + + if (EnableBranchTargets) + addPass(createAArch64BranchTargetsPass()); + // Relax conditional branch instructions if they're otherwise out of // range of their destination. if (BranchRelaxation) addPass(&BranchRelaxationPassID); - if (EnableBranchTargets) - addPass(createAArch64BranchTargetsPass()); - // Identify valid longjmp targets for Windows Control Flow Guard. if (TM->getTargetTriple().isOSWindows()) addPass(createCFGuardLongjmpPass()); diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll index 20214fb83f4..0fe214d5d78 100644 --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -58,8 +58,8 @@ ; CHECK-NEXT: AArch64 pseudo instruction expansion pass ; CHECK-NEXT: AArch64 speculation hardening pass ; CHECK-NEXT: Analyze Machine Code For Garbage Collection -; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: AArch64 Branch Targets +; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index de6fc340491..e1ab6782ed0 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -164,8 +164,8 @@ ; CHECK-NEXT: MachinePostDominator Tree Construction ; CHECK-NEXT: Branch Probability Basic Block Placement ; CHECK-NEXT: AArch64 load / store optimization pass -; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: AArch64 Branch Targets +; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: AArch64 Compress Jump Tables ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis diff --git a/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll b/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll new file mode 100644 index 00000000000..93cbc3b85bb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll @@ -0,0 +1,64 @@ +; RUN: llc %s -o - | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-unknown-eabi" + +; Function Attrs: nounwind +define dso_local void @f(i64 %v) local_unnamed_addr #0 { +entry: + %call = tail call i32 bitcast (i32 (...)* @test to i32 ()*)() #0 + %and = and i32 %call, 2 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %if.then, label %if.else +; CHECK: tbz +; CHECK-NEXT: b +if.then: ; preds = %entry + switch i64 %v, label %sw.epilog [ + i64 0, label %sw.bb + i64 1, label %sw.bb1 + i64 2, label %sw.bb2 + i64 3, label %sw.bb3 + ] + +sw.bb: ; preds = %if.then + tail call void bitcast (void (...)* @g0 to void ()*)() #0 + br label %sw.bb1 + +sw.bb1: ; preds = %if.then, %sw.bb + tail call void bitcast (void (...)* @g1 to void ()*)() #0 + br label %sw.bb2 + +sw.bb2: ; preds = %if.then, %sw.bb1 + tail call void bitcast (void (...)* @g2 to void ()*)() #0 + br label %sw.bb3 + +sw.bb3: ; preds = %if.then, %sw.bb2 + tail call void bitcast (void (...)* @g3 to void ()*)() #0 + br label %sw.epilog + +sw.epilog: ; preds = %sw.bb3, %if.then + %dummy = tail call i64 @llvm.aarch64.space(i32 32700, i64 %v) + br label %if.end + +if.else: ; preds = %entry + tail call void bitcast (void (...)* @e to void ()*)() #0 + br label %if.end + +if.end: ; preds = %if.else, %sw.epilog + ret void +} + +declare dso_local i32 @test(...) local_unnamed_addr #0 + +declare dso_local void @g0(...) local_unnamed_addr #0 + +declare dso_local void @g1(...) local_unnamed_addr #0 + +declare dso_local void @g2(...) local_unnamed_addr #0 + +declare dso_local void @g3(...) local_unnamed_addr #0 + +declare dso_local void @e(...) local_unnamed_addr #0 + +declare dso_local i64 @llvm.aarch64.space(i32, i64) local_unnamed_addr #0 + +attributes #0 = { nounwind "branch-target-enforcement" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" } |