diff options
author | Hal Finkel <hfinkel@anl.gov> | 2016-09-07 07:36:11 +0000 |
---|---|---|
committer | Hal Finkel <hfinkel@anl.gov> | 2016-09-07 07:36:11 +0000 |
commit | 42c83f131eff4046ba166b88959ae29ebce0c925 (patch) | |
tree | db21204f5125f13d6672129e1561a927c5366155 /llvm/test/CodeGen/PowerPC/addi-offset-fold.ll | |
parent | db741e7203b9246816bc911e3e5fa7b19ffc0c14 (diff) | |
download | bcm5719-llvm-42c83f131eff4046ba166b88959ae29ebce0c925.tar.gz bcm5719-llvm-42c83f131eff4046ba166b88959ae29ebce0c925.zip |
[PowerPC] Fix address-offset folding for plain addi
When folding an addi into a memory access that can take an immediate offset, we
were implicitly assuming that the existing offset was zero. This was incorrect.
If we're dealing with an addi with a plain constant, we can add it to the
existing offset (assuming that doesn't overflow the immediate, etc.), but if we
have anything else (i.e. something that will become a relocation expression),
we'll go back to requiring the existing immediate offset to be zero (because we
don't know what the requirements on that relocation expression might be - e.g.
maybe it is paired with some addis in some relevant way).
On the other hand, when dealing with a plain addi with a regular constant
immediate, the alignment restrictions (from the TOC base pointer, etc.) are
irrelevant.
I've added the test case from PR30280, which demonstrated the bug, but also
demonstrates a missed optimization opportunity (i.e. we don't need the memory
accesses at all).
Fixes PR30280.
llvm-svn: 280789
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/addi-offset-fold.ll')
-rw-r--r-- | llvm/test/CodeGen/PowerPC/addi-offset-fold.ll | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/PowerPC/addi-offset-fold.ll b/llvm/test/CodeGen/PowerPC/addi-offset-fold.ll new file mode 100644 index 00000000000..063bfdb0d47 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/addi-offset-fold.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +%struct.S0 = type <{ i32, [5 x i8] }> + +; Function Attrs: norecurse nounwind readnone +define signext i32 @foo([2 x i64] %a.coerce) local_unnamed_addr #0 { +entry: + %a = alloca %struct.S0, align 8 + %a.coerce.fca.0.extract = extractvalue [2 x i64] %a.coerce, 0 + %a.coerce.fca.1.extract = extractvalue [2 x i64] %a.coerce, 1 + %a.0.a.0..sroa_cast = bitcast %struct.S0* %a to i64* + store i64 %a.coerce.fca.0.extract, i64* %a.0.a.0..sroa_cast, align 8 + %tmp.sroa.2.0.extract.trunc = trunc i64 %a.coerce.fca.1.extract to i8 + %a.8.a.8..sroa_idx = getelementptr inbounds %struct.S0, %struct.S0* %a, i64 0, i32 1, i64 4 + store i8 %tmp.sroa.2.0.extract.trunc, i8* %a.8.a.8..sroa_idx, align 8 + %a.4.a.4..sroa_idx = getelementptr inbounds %struct.S0, %struct.S0* %a, i64 0, i32 1 + %a.4.a.4..sroa_cast = bitcast [5 x i8]* %a.4.a.4..sroa_idx to i40* + %a.4.a.4.bf.load = load i40, i40* %a.4.a.4..sroa_cast, align 4 + %bf.lshr = lshr i40 %a.4.a.4.bf.load, 31 + %bf.lshr.tr = trunc i40 %bf.lshr to i32 + %bf.cast = and i32 %bf.lshr.tr, 127 + ret i32 %bf.cast + +; CHECK-LABEL: @foo +; FIXME: We don't need to do these stores/loads at all. +; CHECK-DAG: std 3, -24(1) +; CHECK-DAG: stb 4, -16(1) +; CHECK: ori 2, 2, 0 +; CHECK-DAG: lbz [[REG1:[0-9]+]], -16(1) +; CHECK-DAG: lwz [[REG2:[0-9]+]], -20(1) +; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG1]], 32 +; CHECK-DAG: or [[REG4:[0-9]+]], [[REG2]], [[REG3]] +; CHECK: rldicl 3, [[REG4]], 33, 57 +; CHECK: blr +} + +attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="ppc64le" } + |