diff options
| author | Matthias Braun <matze@braunis.de> | 2016-12-03 00:52:56 +0000 |
|---|---|---|
| committer | Matthias Braun <matze@braunis.de> | 2016-12-03 00:52:56 +0000 |
| commit | 1fbb0f6dd9979ae469040b43c9d9262f4430611e (patch) | |
| tree | 2a3538b098329fd9ce434890447072b0aac41d59 /llvm/test/CodeGen | |
| parent | 835de1f3ab2df5ce56eab9e5d9021a26fe04b854 (diff) | |
| download | bcm5719-llvm-1fbb0f6dd9979ae469040b43c9d9262f4430611e.tar.gz bcm5719-llvm-1fbb0f6dd9979ae469040b43c9d9262f4430611e.zip | |
AArch64CollectLOH: Rewrite as block-local analysis.
Previously this pass was using up to 5% compile time in some cases which
is a bit much for what it is doing. The pass featured a full blown
data-flow analysis which in the default configuration was restricted to a
single block.
This rewrites the pass under the assumption that we only ever work on a
single block. This is done in a single pass maintaining a state machine
per general purpose register to catch LOH patterns.
Differential Revision: https://reviews.llvm.org/D27329
llvm-svn: 288561
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-collect-loh.ll | 17 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/loh.mir | 179 |
4 files changed, 191 insertions, 9 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll index 4a3696501fd..727c189721f 100644 --- a/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll +++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=true -aarch64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh | FileCheck %s ; Check that the LOH analysis does not crash when the analysed chained ; contains instructions that are filtered out. ; diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll index e3df4182ddc..773286ef1d7 100644 --- a/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll +++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s ; Test case for <rdar://problem/15942912>. ; AdrpAddStr cannot be used when the store uses same ; register as address and value. Indeed, the related diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll index b697b6eced3..c7ba989d933 100644 --- a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll +++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s -; RUN: llc -mtriple=arm64-linux-gnu -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s --check-prefix=CHECK-ELF +; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64-linux-gnu -O2 | FileCheck %s --check-prefix=CHECK-ELF ; CHECK-ELF-NOT: .loh ; CHECK-ELF-NOT: AdrpAdrp @@ -633,11 +633,14 @@ define void @setL(<1 x i8> %t) { ; a tuple register to appear in the lowering. Thus, the target ; cpu is required to have the problem reproduced. ; CHECK-LABEL: _uninterestingSub +; CHECK: [[LOH_LABEL0:Lloh[0-9]+]]: ; CHECK: adrp [[ADRP_REG:x[0-9]+]], [[CONSTPOOL:lCPI[0-9]+_[0-9]+]]@PAGE -; CHECK-NEXT: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF] +; CHECK: [[LOH_LABEL1:Lloh[0-9]+]]: +; CHECK: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF] ; The tuple comes from the next instruction. ; CHECK-NEXT: tbl.16b v{{[0-9]+}}, { v{{[0-9]+}}, v{{[0-9]+}} }, v[[IDX]] ; CHECK: ret +; CHECK: .loh AdrpLdr [[LOH_LABEL0]], [[LOH_LABEL1]] define void @uninterestingSub(i8* nocapture %row) #0 { %tmp = bitcast i8* %row to <16 x i8>* %tmp1 = load <16 x i8>, <16 x i8>* %tmp, align 16 @@ -664,10 +667,10 @@ entry: if.then.i: ret void if.end.i: -; CHECK: .loh AdrpAdrp Lloh91, Lloh93 -; CHECK: .loh AdrpLdr Lloh91, Lloh92 -; CHECK: .loh AdrpLdrGot Lloh93, Lloh95 -; CHECK: .loh AdrpLdrGot Lloh94, Lloh96 +; CHECK: .loh AdrpLdrGot +; CHECK: .loh AdrpLdrGot +; CHECK: .loh AdrpAdrp +; CHECK: .loh AdrpLdr %mul.i.i.i = fmul double undef, 1.000000e-06 %add.i.i.i = fadd double undef, %mul.i.i.i %sub.i.i = fsub double %add.i.i.i, undef diff --git a/llvm/test/CodeGen/AArch64/loh.mir b/llvm/test/CodeGen/AArch64/loh.mir new file mode 100644 index 00000000000..4809eb5c92a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/loh.mir @@ -0,0 +1,179 @@ +# RUN: llc -o /dev/null %s -mtriple=aarch64-apple-ios -run-pass=aarch64-collect-loh -debug-only=aarch64-collect-loh 2>&1 | FileCheck %s +--- | + define void @func0() { ret void } + + declare void @extfunc() + + @g0 = external global i32 + @g1 = external global i32 + @g2 = external global i32 + @g3 = external global i32 + @g4 = external global i32 + @g5 = external global i32 +... +--- +# Check various LOH variants. Remember that the algorithms walks the basic +# blocks backwards. +# CHECK-LABEL: ********** AArch64 Collect LOH ********** +# CHECK-LABEL: Looking in function func0 +name: func0 +body: | + bb.0: + ; CHECK: Adding MCLOH_AdrpAdrp: + ; CHECK-NEXT: %X1<def> = ADRP <ga:@g3> + ; CHECK-NEXT: %X1<def> = ADRP <ga:@g4> + ; CHECK-NEXT: Adding MCLOH_AdrpAdrp: + ; CHECK-NEXT: %X1<def> = ADRP <ga:@g2> + ; CHECK-NEXT: %X1<def> = ADRP <ga:@g3> + ; CHECK-NEXT: Adding MCLOH_AdrpAdrp: + ; CHECK-NEXT: %X0<def> = ADRP <ga:@g0> + ; CHECK-NEXT: %X0<def> = ADRP <ga:@g1> + %x0 = ADRP target-flags(aarch64-page) @g0 + %x0 = ADRP target-flags(aarch64-page) @g1 + %x1 = ADRP target-flags(aarch64-page) @g2 + %x1 = ADRP target-flags(aarch64-page) @g3 + %x1 = ADRP target-flags(aarch64-page) @g4 + + bb.1: + ; CHECK-NEXT: Adding MCLOH_AdrpAdd: + ; CHECK-NEXT: %X20<def> = ADRP <ga:@g0> + ; CHECK-NEXT: %X3<def> = ADDXri %X20, <ga:@g0> + ; CHECK-NEXT: Adding MCLOH_AdrpAdd: + ; CHECK-NEXT: %X1<def> = ADRP <ga:@g0> + ; CHECK-NEXT: %X1<def> = ADDXri %X1, <ga:@g0> + %x1 = ADRP target-flags(aarch64-page) @g0 + %x9 = SUBXri %x11, 5, 0 ; should not affect MCLOH formation + %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g0, 0 + %x20 = ADRP target-flags(aarch64-page) @g0 + BL @extfunc, csr_aarch64_aapcs ; should not clobber X20 + %x3 = ADDXri %x20, target-flags(aarch64-pageoff) @g0, 0 + + bb.2: + ; CHECK-NOT: MCLOH_AdrpAdd + %x9 = ADRP target-flags(aarch64-page) @g0 + BL @extfunc, csr_aarch64_aapcs ; clobbers x9 + %x9 = ADDXri %x9, target-flags(aarch64-pageoff) @g0, 0 + + bb.3: + ; CHECK-NOT: MCLOH_AdrpAdd + %x10 = ADRP target-flags(aarch64-page) @g0 + HINT 0, implicit def dead %x10 ; clobbers x10 + %x10 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 + + bb.4: + ; Cannot produce a LOH for multiple users + ; CHECK-NOT: MCLOH_AdrpAdd + %x10 = ADRP target-flags(aarch64-page) @g0 + HINT 0, implicit def dead %x10 ; clobbers x10 + %x11 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 + %x12 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 + + bb.5: + ; CHECK-NEXT: Adding MCLOH_AdrpLdr: + ; CHECK-NEXT: %X5<def> = ADRP <ga:@g2> + ; CHECK-NEXT: %S6<def> = LDRSui %X5, <ga:@g2> + ; CHECK-NEXT: Adding MCLOH_AdrpLdr: + ; CHECK-NEXT: %X4<def> = ADRP <ga:@g2> + ; CHECK-NEXT: %X4<def> = LDRXui %X4, <ga:@g2> + %x4 = ADRP target-flags(aarch64-page) @g2 + %x4 = LDRXui %x4, target-flags(aarch64-pageoff) @g2 + %x5 = ADRP target-flags(aarch64-page) @g2 + %s6 = LDRSui %x5, target-flags(aarch64-pageoff) @g2 + + bb.6: + ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot: + ; CHECK-NEXT: %X5<def> = ADRP <ga:@g2> + ; CHECK-NEXT: %X6<def> = LDRXui %X5, <ga:@g2> + ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot: + ; CHECK-NEXT: %X4<def> = ADRP <ga:@g2> + ; CHECK-NEXT: %X4<def> = LDRXui %X4, <ga:@g2> + %x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2 + %x4 = LDRXui %x4, target-flags(aarch64-pageoff, aarch64-got) @g2 + %x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2 + %x6 = LDRXui %x5, target-flags(aarch64-pageoff, aarch64-got) @g2 + + bb.7: + ; CHECK-NOT: Adding MCLOH_AdrpLdrGot: + ; This sequence makes no sense and should not produce a LdrGot + %x11 = ADRP target-flags(aarch64-page, aarch64-got) @g5 + %s11 = LDRSui %x4, target-flags(aarch64-pageoff, aarch64-got) @g5 + + bb.8: + ; CHECK-NEXT: Adding MCLOH_AdrpAddLdr: + ; CHECK-NEXT: %X7<def> = ADRP <ga:@g3>[TF=1] + ; CHECK-NEXT: %X8<def> = ADDXri %X7, <ga:@g3> + ; CHECK-NEXT: %D1<def> = LDRDui %X8, 8 + %x7 = ADRP target-flags(aarch64-page) @g3 + %x8 = ADDXri %x7, target-flags(aarch64-pageoff) @g3, 0 + %d1 = LDRDui %x8, 8 + + bb.9: + ; CHECK-NEXT: Adding MCLOH_AdrpAdd: + ; CHECK-NEXT: %X3<def> = ADRP <ga:@g3> + ; CHECK-NEXT: %X3<def> = ADDXri %X3, <ga:@g3> + ; CHECK-NEXT: Adding MCLOH_AdrpAdd: + ; CHECK-NEXT: %X5<def> = ADRP <ga:@g3> + ; CHECK-NEXT: %X2<def> = ADDXri %X5, <ga:@g3> + ; CHECK-NEXT: Adding MCLOH_AdrpAddStr: + ; CHECK-NEXT: %X1<def> = ADRP <ga:@g3> + ; CHECK-NEXT: %X1<def> = ADDXri %X1, <ga:@g3> + ; CHECK-NEXT: STRXui %X2, %X1, 16 + %x1 = ADRP target-flags(aarch64-page) @g3 + %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g3, 0 + STRXui %x2, %x1, 16 + + ; This sequence should just produce an AdrpAdd (not AdrpAddStr) + %x5 = ADRP target-flags(aarch64-page) @g3 + %x2 = ADDXri %x5, target-flags(aarch64-pageoff) @g3, 0 + STRXui %x2, %x11, 16 + + ; This sequence should just produce an AdrpAdd (not AdrpAddStr) + %x3 = ADRP target-flags(aarch64-page) @g3 + %x3 = ADDXri %x3, target-flags(aarch64-pageoff) @g3, 0 + STRXui %x3, %x3, 16 + + bb.10: + ; CHECK-NEXT: Adding MCLOH_AdrpLdr: + ; CHECK-NEXT: %X2<def> = ADRP <ga:@g3> + ; CHECK-NEXT: %X2<def> = LDRXui %X2, <ga:@g3> + ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotLdr: + ; CHECK-NEXT: %X1<def> = ADRP <ga:@g4> + ; CHECK-NEXT: %X1<def> = LDRXui %X1, <ga:@g4> + ; CHECK-NEXT: %X1<def> = LDRXui %X1, 24 + %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 + %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 + %x1 = LDRXui %x1, 24 + ; Should just produce a MCLOH_AdrpLdr (not MCLOH_AdrpLdrGotLdr) + %x2 = ADRP target-flags(aarch64-page) @g3 + %x2 = LDRXui %x2, target-flags(aarch64-pageoff) @g3 + %x2 = LDRXui %x2, 24 + + bb.11: + ; CHECK-NEXT: Adding MCLOH_AdrpLdr + ; CHECK-NEXT: %X5<def> = ADRP <ga:@g1> + ; CHECK-NEXT: %X5<def> = LDRXui %X5, <ga:@g1> + ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotStr: + ; CHECK-NEXT: %X1<def> = ADRP <ga:@g4> + ; CHECK-NEXT: %X1<def> = LDRXui %X1, <ga:@g4> + ; CHECK-NEXT: STRXui %X4, %X1, 32 + %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 + %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 + STRXui %x4, %x1, 32 + ; Should just produce a MCLOH_AdrpLdr (not MCLOH_AdrpLdrGotStr) + %x5 = ADRP target-flags(aarch64-page) @g1 + %x5 = LDRXui %x5, target-flags(aarch64-pageoff) @g1 + STRXui %x11, %x5, 32 + + bb.12: + successors: %bb.13 + ; Cannot produce a LOH for multiple users + ; CHECK-NOT: MCLOH_AdrpAdd + %x10 = ADRP target-flags(aarch64-page) @g0 + HINT 0, implicit def dead %x10 ; clobbers x10 + %x11 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 + B %bb.13 + + bb.13: + liveins: %x10 + %x12 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 +... |

