summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2016-09-07 07:36:11 +0000
committerHal Finkel <hfinkel@anl.gov>2016-09-07 07:36:11 +0000
commit42c83f131eff4046ba166b88959ae29ebce0c925 (patch)
treedb21204f5125f13d6672129e1561a927c5366155
parentdb741e7203b9246816bc911e3e5fa7b19ffc0c14 (diff)
downloadbcm5719-llvm-42c83f131eff4046ba166b88959ae29ebce0c925.tar.gz
bcm5719-llvm-42c83f131eff4046ba166b88959ae29ebce0c925.zip
[PowerPC] Fix address-offset folding for plain addi
When folding an addi into a memory access that can take an immediate offset, we were implicitly assuming that the existing offset was zero. This was incorrect. If we're dealing with an addi with a plain constant, we can add it to the existing offset (assuming that doesn't overflow the immediate, etc.), but if we have anything else (i.e. something that will become a relocation expression), we'll go back to requiring the existing immediate offset to be zero (because we don't know what the requirements on that relocation expression might be - e.g. maybe it is paired with some addis in some relevant way). On the other hand, when dealing with a plain addi with a regular constant immediate, the alignment restrictions (from the TOC base pointer, etc.) are irrelevant. I've added the test case from PR30280, which demonstrated the bug, but also demonstrates a missed optimization opportunity (i.e. we don't need the memory accesses at all). Fixes PR30280. llvm-svn: 280789
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp53
-rw-r--r--llvm/test/CodeGen/PowerPC/addi-offset-fold.ll40
2 files changed, 78 insertions, 15 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 56e39221edf..91eb0514ed1 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -4385,25 +4385,48 @@ void PPCDAGToDAGISel::PeepholePPC64() {
SDValue HBase = Base.getOperand(0);
int Offset = N->getConstantOperandVal(FirstOp);
- if (Offset < 0 || Offset > MaxDisplacement) {
- // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
- // one use, then we can do this for any offset, we just need to also
- // update the offset (i.e. the symbol addend) on the addis also.
- if (Base.getMachineOpcode() != PPC::ADDItocL)
- continue;
+ if (ReplaceFlags) {
+ if (Offset < 0 || Offset > MaxDisplacement) {
+ // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
+ // one use, then we can do this for any offset, we just need to also
+ // update the offset (i.e. the symbol addend) on the addis also.
+ if (Base.getMachineOpcode() != PPC::ADDItocL)
+ continue;
- if (!HBase.isMachineOpcode() ||
- HBase.getMachineOpcode() != PPC::ADDIStocHA)
- continue;
+ if (!HBase.isMachineOpcode() ||
+ HBase.getMachineOpcode() != PPC::ADDIStocHA)
+ continue;
- if (!Base.hasOneUse() || !HBase.hasOneUse())
- continue;
+ if (!Base.hasOneUse() || !HBase.hasOneUse())
+ continue;
- SDValue HImmOpnd = HBase.getOperand(1);
- if (HImmOpnd != ImmOpnd)
- continue;
+ SDValue HImmOpnd = HBase.getOperand(1);
+ if (HImmOpnd != ImmOpnd)
+ continue;
- UpdateHBase = true;
+ UpdateHBase = true;
+ }
+ } else {
+ // If we're directly folding the addend from an addi instruction, then:
+ // 1. In general, the offset on the memory access must be zero.
+ // 2. If the addend is a constant, then it can be combined with a
+ // non-zero offset, but only if the result meets the encoding
+ // requirements.
+ if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
+ Offset += C->getSExtValue();
+
+ if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD ||
+ StorageOpcode == PPC::STD) && (Offset % 4) != 0)
+ continue;
+
+ if (!isInt<16>(Offset))
+ continue;
+
+ ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
+ ImmOpnd.getValueType());
+ } else if (Offset != 0) {
+ continue;
+ }
}
// We found an opportunity. Reverse the operands from the add
diff --git a/llvm/test/CodeGen/PowerPC/addi-offset-fold.ll b/llvm/test/CodeGen/PowerPC/addi-offset-fold.ll
new file mode 100644
index 00000000000..063bfdb0d47
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/addi-offset-fold.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+%struct.S0 = type <{ i32, [5 x i8] }>
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @foo([2 x i64] %a.coerce) local_unnamed_addr #0 {
+entry:
+ %a = alloca %struct.S0, align 8
+ %a.coerce.fca.0.extract = extractvalue [2 x i64] %a.coerce, 0
+ %a.coerce.fca.1.extract = extractvalue [2 x i64] %a.coerce, 1
+ %a.0.a.0..sroa_cast = bitcast %struct.S0* %a to i64*
+ store i64 %a.coerce.fca.0.extract, i64* %a.0.a.0..sroa_cast, align 8
+ %tmp.sroa.2.0.extract.trunc = trunc i64 %a.coerce.fca.1.extract to i8
+ %a.8.a.8..sroa_idx = getelementptr inbounds %struct.S0, %struct.S0* %a, i64 0, i32 1, i64 4
+ store i8 %tmp.sroa.2.0.extract.trunc, i8* %a.8.a.8..sroa_idx, align 8
+ %a.4.a.4..sroa_idx = getelementptr inbounds %struct.S0, %struct.S0* %a, i64 0, i32 1
+ %a.4.a.4..sroa_cast = bitcast [5 x i8]* %a.4.a.4..sroa_idx to i40*
+ %a.4.a.4.bf.load = load i40, i40* %a.4.a.4..sroa_cast, align 4
+ %bf.lshr = lshr i40 %a.4.a.4.bf.load, 31
+ %bf.lshr.tr = trunc i40 %bf.lshr to i32
+ %bf.cast = and i32 %bf.lshr.tr, 127
+ ret i32 %bf.cast
+
+; CHECK-LABEL: @foo
+; FIXME: We don't need to do these stores/loads at all.
+; CHECK-DAG: std 3, -24(1)
+; CHECK-DAG: stb 4, -16(1)
+; CHECK: ori 2, 2, 0
+; CHECK-DAG: lbz [[REG1:[0-9]+]], -16(1)
+; CHECK-DAG: lwz [[REG2:[0-9]+]], -20(1)
+; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG1]], 32
+; CHECK-DAG: or [[REG4:[0-9]+]], [[REG2]], [[REG3]]
+; CHECK: rldicl 3, [[REG4]], 33, 57
+; CHECK: blr
+}
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="ppc64le" }
+
OpenPOWER on IntegriCloud