diff options
| author | Sameer AbuAsal <sabuasal@codeaurora.org> | 2018-06-27 20:51:42 +0000 |
|---|---|---|
| committer | Sameer AbuAsal <sabuasal@codeaurora.org> | 2018-06-27 20:51:42 +0000 |
| commit | 9b65ffb0976c5b9590959a6439d338fc5ec0dceb (patch) | |
| tree | a81461df9ed2a3cc208291ec5d8da0b06f0ce64f /llvm/test/CodeGen/RISCV | |
| parent | 8513cd4c0e577a8b2df17b9d0cda5e0b42273668 (diff) | |
| download | bcm5719-llvm-9b65ffb0976c5b9590959a6439d338fc5ec0dceb.tar.gz bcm5719-llvm-9b65ffb0976c5b9590959a6439d338fc5ec0dceb.zip | |
[RISCV] Add machine function pass to merge base + offset
Summary:
In r333455 we added a peephole to fix the corner cases that result
from separating base + offset lowering of global address.The
peephole didn't handle some of the cases because it only has a basic
block view instead of a function level view.
This patch replaces that logic with a machine function pass. In
addition to handling the original cases it handles uses of the global
address across blocks in function and folding an offset from LW\SW
instruction. This pass won't run for OptNone compilation, so there
will be a negative impact overall vs the old approach at O0.
Reviewers: asb, apazos, mgrang
Reviewed By: asb
Subscribers: MartinMosbeck, brucehoult, the_o, rogfer01, mgorny, rbar, johnrusso, simoncook, niosHD, kito-cheng, shiva0217, zzheng, llvm-commits, edward-jones
Differential Revision: https://reviews.llvm.org/D47857
llvm-svn: 335786
Diffstat (limited to 'llvm/test/CodeGen/RISCV')
| -rw-r--r-- | llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll | 75 |
1 files changed, 38 insertions, 37 deletions
diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll index 1eaf3034906..2a1d5ed1a08 100644 --- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll +++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll @@ -49,6 +49,23 @@ if.end: ; preds = %if.then, %entry ret void } +; This test checks that the offset is reconstructed correctly when +; "addi" of the big offset has a negative immediate. +; without peephole this generates: +; lui a1, %hi(g) +; addi a1, a0, %lo(g) +; lui a0, 18 ---> offset +; addi a0, a0, -160 +; add a0, a0, a1 ---> base + offset. +define i8* @big_offset_neg_addi() { +; CHECK-LABEL: big_offset_neg_addi: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(g+73568) +; CHECK-NEXT: addi a0, a0, %lo(g+73568) +; CHECK-NEXT: ret + ret i8* getelementptr inbounds ([1048576 x i8], [1048576 x i8]* @g, i32 0, i32 73568) +} + ; This test checks for the case where the offset is only an LUI. ; without peephole this generates: ; lui a0, %hi(g) @@ -84,36 +101,19 @@ entry: ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1) } -; TODO: In this case we get a better sequence if the offset didn't get didn't -; get merged back in %if.end and %if.then. The current peephole is not able to -; detect the shared global address node across blocks. -; Without the peephole we can generate: -;# %bb.0: # %entry -; lui a0, %hi(s) -; addi a0, a0, %lo(s) -; lw a1, 164(a0) -; beqz a1, .LBB0_2 -;# %bb.1: # %if.end -; addi a0, a0, 168 -; ret -;.LBB0_2: # %if.then -; addi a0, a0, 160 -; ret ; Function Attrs: norecurse nounwind optsize readonly define dso_local i32* @control_flow_no_mem(i32 %n) local_unnamed_addr #1 { ; CHECK-LABEL: control_flow_no_mem: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lui a0, %hi(s) ; CHECK-NEXT: addi a0, a0, %lo(s) -; CHECK-NEXT: lw a0, 164(a0) -; CHECK-NEXT: beqz a0, .LBB5_2 +; CHECK-NEXT: lw a1, 164(a0) +; CHECK-NEXT: beqz a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: lui a0, %hi(s+168) -; CHECK-NEXT: addi a0, a0, %lo(s+168) +; CHECK-NEXT: addi a0, a0, 168 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB5_2: # %if.then -; CHECK-NEXT: lui a0, %hi(s+160) -; CHECK-NEXT: addi a0, a0, %lo(s+160) +; CHECK-NEXT: .LBB6_2: # %if.then +; CHECK-NEXT: addi a0, a0, 160 ; CHECK-NEXT: ret entry: %0 = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 2), align 4 @@ -125,32 +125,21 @@ if.end: ; preds = %if.then, %entry ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 3) } -;TODO: Offset shouln't be separated in this case. We get shorter sequence if it -; is merged in the LUI %hi and the ADDI %lo, the "ADDI" could be folded in the -; immediate part of "lhu" genertating the sequence: -; lui a0, %hi(foo +8) -; lhu a0, %lo(foo+8)(a0) -; instead of: -; lui a0, %hi(foo) -; addi a0, a0, %lo(foo) -; lhu a0, 8(a0) - define dso_local i32 @load_half() nounwind { ; CHECK-LABEL: load_half: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: sw ra, 12(sp) -; CHECK-NEXT: lui a0, %hi(foo) -; CHECK-NEXT: addi a0, a0, %lo(foo) -; CHECK-NEXT: lhu a0, 8(a0) +; CHECK-NEXT: lui a0, %hi(foo+8) +; CHECK-NEXT: lhu a0, %lo(foo+8)(a0) ; CHECK-NEXT: addi a1, zero, 140 -; CHECK-NEXT: bne a0, a1, .LBB6_2 +; CHECK-NEXT: bne a0, a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: mv a0, zero ; CHECK-NEXT: lw ra, 12(sp) ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB6_2: # %if.then +; CHECK-NEXT: .LBB7_2: # %if.then ; CHECK-NEXT: call abort entry: %0 = load i16, i16* getelementptr inbounds ([6 x i16], [6 x i16]* @foo, i32 0, i32 4), align 2 @@ -166,3 +155,15 @@ if.end: } declare void @abort() + +define dso_local void @one_store() local_unnamed_addr { +; CHECK-LABEL: one_store: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a0, %hi(s+160) +; CHECK-NEXT: addi a1, zero, 10 +; CHECK-NEXT: sw a1, %lo(s+160)(a0) +; CHECK-NEXT: ret +entry: + store i32 10, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4 + ret void +} |

