summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/RISCV
diff options
context:
space:
mode:
authorSameer AbuAsal <sabuasal@codeaurora.org>2018-06-27 20:51:42 +0000
committerSameer AbuAsal <sabuasal@codeaurora.org>2018-06-27 20:51:42 +0000
commit9b65ffb0976c5b9590959a6439d338fc5ec0dceb (patch)
treea81461df9ed2a3cc208291ec5d8da0b06f0ce64f /llvm/test/CodeGen/RISCV
parent8513cd4c0e577a8b2df17b9d0cda5e0b42273668 (diff)
downloadbcm5719-llvm-9b65ffb0976c5b9590959a6439d338fc5ec0dceb.tar.gz
bcm5719-llvm-9b65ffb0976c5b9590959a6439d338fc5ec0dceb.zip
[RISCV] Add machine function pass to merge base + offset
Summary: In r333455 we added a peephole to fix the corner cases that result from separating base + offset lowering of global address.The peephole didn't handle some of the cases because it only has a basic block view instead of a function level view. This patch replaces that logic with a machine function pass. In addition to handling the original cases it handles uses of the global address across blocks in function and folding an offset from LW\SW instruction. This pass won't run for OptNone compilation, so there will be a negative impact overall vs the old approach at O0. Reviewers: asb, apazos, mgrang Reviewed By: asb Subscribers: MartinMosbeck, brucehoult, the_o, rogfer01, mgorny, rbar, johnrusso, simoncook, niosHD, kito-cheng, shiva0217, zzheng, llvm-commits, edward-jones Differential Revision: https://reviews.llvm.org/D47857 llvm-svn: 335786
Diffstat (limited to 'llvm/test/CodeGen/RISCV')
-rw-r--r--llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll75
1 files changed, 38 insertions, 37 deletions
diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
index 1eaf3034906..2a1d5ed1a08 100644
--- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
+++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
@@ -49,6 +49,23 @@ if.end: ; preds = %if.then, %entry
ret void
}
+; This test checks that the offset is reconstructed correctly when
+; "addi" of the big offset has a negative immediate.
+; without peephole this generates:
+; lui a1, %hi(g)
+; addi a1, a0, %lo(g)
+; lui a0, 18 ---> offset
+; addi a0, a0, -160
+; add a0, a0, a1 ---> base + offset.
+define i8* @big_offset_neg_addi() {
+; CHECK-LABEL: big_offset_neg_addi:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(g+73568)
+; CHECK-NEXT: addi a0, a0, %lo(g+73568)
+; CHECK-NEXT: ret
+ ret i8* getelementptr inbounds ([1048576 x i8], [1048576 x i8]* @g, i32 0, i32 73568)
+}
+
; This test checks for the case where the offset is only an LUI.
; without peephole this generates:
; lui a0, %hi(g)
@@ -84,36 +101,19 @@ entry:
ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1)
}
-; TODO: In this case we get a better sequence if the offset didn't get didn't
-; get merged back in %if.end and %if.then. The current peephole is not able to
-; detect the shared global address node across blocks.
-; Without the peephole we can generate:
-;# %bb.0: # %entry
-; lui a0, %hi(s)
-; addi a0, a0, %lo(s)
-; lw a1, 164(a0)
-; beqz a1, .LBB0_2
-;# %bb.1: # %if.end
-; addi a0, a0, 168
-; ret
-;.LBB0_2: # %if.then
-; addi a0, a0, 160
-; ret
; Function Attrs: norecurse nounwind optsize readonly
define dso_local i32* @control_flow_no_mem(i32 %n) local_unnamed_addr #1 {
; CHECK-LABEL: control_flow_no_mem:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a0, %hi(s)
; CHECK-NEXT: addi a0, a0, %lo(s)
-; CHECK-NEXT: lw a0, 164(a0)
-; CHECK-NEXT: beqz a0, .LBB5_2
+; CHECK-NEXT: lw a1, 164(a0)
+; CHECK-NEXT: beqz a1, .LBB6_2
; CHECK-NEXT: # %bb.1: # %if.end
-; CHECK-NEXT: lui a0, %hi(s+168)
-; CHECK-NEXT: addi a0, a0, %lo(s+168)
+; CHECK-NEXT: addi a0, a0, 168
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB5_2: # %if.then
-; CHECK-NEXT: lui a0, %hi(s+160)
-; CHECK-NEXT: addi a0, a0, %lo(s+160)
+; CHECK-NEXT: .LBB6_2: # %if.then
+; CHECK-NEXT: addi a0, a0, 160
; CHECK-NEXT: ret
entry:
%0 = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 2), align 4
@@ -125,32 +125,21 @@ if.end: ; preds = %if.then, %entry
ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 3)
}
-;TODO: Offset shouln't be separated in this case. We get shorter sequence if it
-; is merged in the LUI %hi and the ADDI %lo, the "ADDI" could be folded in the
-; immediate part of "lhu" genertating the sequence:
-; lui a0, %hi(foo +8)
-; lhu a0, %lo(foo+8)(a0)
-; instead of:
-; lui a0, %hi(foo)
-; addi a0, a0, %lo(foo)
-; lhu a0, 8(a0)
-
define dso_local i32 @load_half() nounwind {
; CHECK-LABEL: load_half:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sw ra, 12(sp)
-; CHECK-NEXT: lui a0, %hi(foo)
-; CHECK-NEXT: addi a0, a0, %lo(foo)
-; CHECK-NEXT: lhu a0, 8(a0)
+; CHECK-NEXT: lui a0, %hi(foo+8)
+; CHECK-NEXT: lhu a0, %lo(foo+8)(a0)
; CHECK-NEXT: addi a1, zero, 140
-; CHECK-NEXT: bne a0, a1, .LBB6_2
+; CHECK-NEXT: bne a0, a1, .LBB7_2
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: mv a0, zero
; CHECK-NEXT: lw ra, 12(sp)
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB6_2: # %if.then
+; CHECK-NEXT: .LBB7_2: # %if.then
; CHECK-NEXT: call abort
entry:
%0 = load i16, i16* getelementptr inbounds ([6 x i16], [6 x i16]* @foo, i32 0, i32 4), align 2
@@ -166,3 +155,15 @@ if.end:
}
declare void @abort()
+
+define dso_local void @one_store() local_unnamed_addr {
+; CHECK-LABEL: one_store:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a0, %hi(s+160)
+; CHECK-NEXT: addi a1, zero, 10
+; CHECK-NEXT: sw a1, %lo(s+160)(a0)
+; CHECK-NEXT: ret
+entry:
+ store i32 10, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4
+ ret void
+}
OpenPOWER on IntegriCloud