summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp13
-rw-r--r--llvm/test/CodeGen/PowerPC/preincprep-i64-check.ll124
2 files changed, 137 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index d16397b23ed..2217fa4693c 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -294,6 +294,19 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) {
if (LARSCEV->getLoop() != L)
continue;
+ // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
+ // be 4's multiple (DS-form). For i64 loads/stores when the displacement
+ // fits in a 16-bit signed field but isn't a multiple of 4, it will be
+ // useless and possible to break some original well-form addressing mode
+ // to make this pre-inc prep for it.
+ if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) {
+ if (const SCEVConstant *StepConst =
+ dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) {
+ const APInt &ConstInt = StepConst->getValue()->getValue();
+ if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0)
+ continue;
+ }
+ }
} else {
continue;
}
diff --git a/llvm/test/CodeGen/PowerPC/preincprep-i64-check.ll b/llvm/test/CodeGen/PowerPC/preincprep-i64-check.ll
new file mode 100644
index 00000000000..64166a02aa8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/preincprep-i64-check.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs | FileCheck %s
+
+; Verify pre-inc preparation pass doesn't prepare pre-inc for i64 load/store
+; when the stride doesn't conform LDU/STDU DS-form requirement.
+
+@result = local_unnamed_addr global i64 0, align 8
+
+define i64 @test_preinc_i64_ld(i8* nocapture readonly, i64) local_unnamed_addr {
+ %3 = icmp eq i64 %1, 0
+ br i1 %3, label %4, label %6
+
+; <label>:4: ; preds = %2
+ %5 = load i64, i64* @result, align 8
+ br label %13
+
+; <label>:6: ; preds = %2
+ %7 = getelementptr inbounds i8, i8* %0, i64 -50000
+ %8 = getelementptr inbounds i8, i8* %0, i64 -61024
+ %9 = getelementptr inbounds i8, i8* %0, i64 -62048
+ %10 = getelementptr inbounds i8, i8* %0, i64 -64096
+ %11 = load i64, i64* @result, align 8
+ br label %15
+
+; <label>:12: ; preds = %15
+ store i64 %33, i64* @result, align 8
+ br label %13
+
+; <label>:13: ; preds = %12, %4
+ %14 = phi i64 [ %5, %4 ], [ %33, %12 ]
+ ret i64 %14
+
+; <label>:15: ; preds = %15, %6
+ %16 = phi i64 [ %11, %6 ], [ %33, %15 ]
+ %17 = phi i64 [ 0, %6 ], [ %34, %15 ]
+ %18 = getelementptr inbounds i8, i8* %7, i64 %17
+ %19 = bitcast i8* %18 to i64*
+ %20 = load i64, i64* %19, align 8
+ %21 = getelementptr inbounds i8, i8* %8, i64 %17
+ %22 = bitcast i8* %21 to i64*
+ %23 = load i64, i64* %22, align 8
+ %24 = getelementptr inbounds i8, i8* %9, i64 %17
+ %25 = bitcast i8* %24 to i64*
+ %26 = load i64, i64* %25, align 8
+ %27 = getelementptr inbounds i8, i8* %10, i64 %17
+ %28 = bitcast i8* %27 to i64*
+ %29 = load i64, i64* %28, align 8
+ %30 = mul i64 %23, %20
+ %31 = mul i64 %30, %26
+ %32 = mul i64 %31, %29
+ %33 = mul i64 %32, %16
+ %34 = add nuw i64 %17, 1
+ %35 = icmp eq i64 %34, %1
+ br i1 %35, label %12, label %15
+}
+
+; CHECK-LABEL: test_preinc_i64_ld
+; CHECK-NOT: addi {{[0-9]+}}, {{[0-9]+}}, -11023
+; CHECK-NOT: addi {{[0-9]+}}, {{[0-9]+}}, -12047
+; CHECK-NOT: addi {{[0-9]+}}, {{[0-9]+}}, -14095
+; CHECK-DAG: ld {{[0-9]+}}, 14096([[REG1:[0-9]+]])
+; CHECK-DAG: ld {{[0-9]+}}, 3072([[REG1]])
+; CHECK-DAG: ld {{[0-9]+}}, 2048([[REG1]])
+; CHECK-DAG: ld {{[0-9]+}}, 0([[REG1]])
+; CHECK: blr
+
+define i64 @test_preinc_i64_ldst(i8* nocapture, i64, i64) local_unnamed_addr {
+ %4 = icmp eq i64 %1, 0
+ br i1 %4, label %5, label %7
+
+; <label>:5: ; preds = %3
+ %6 = load i64, i64* @result, align 8
+ br label %16
+
+; <label>:7: ; preds = %3
+ %8 = add i64 %2, 1
+ %9 = getelementptr inbounds i8, i8* %0, i64 -1024
+ %10 = add i64 %2, 2
+ %11 = getelementptr inbounds i8, i8* %0, i64 -2048
+ %12 = getelementptr inbounds i8, i8* %0, i64 -3072
+ %13 = getelementptr inbounds i8, i8* %0, i64 -4096
+ %14 = load i64, i64* @result, align 8
+ br label %18
+
+; <label>:15: ; preds = %18
+ store i64 %32, i64* @result, align 8
+ br label %16
+
+; <label>:16: ; preds = %15, %5
+ %17 = phi i64 [ %6, %5 ], [ %32, %15 ]
+ ret i64 %17
+
+; <label>:18: ; preds = %18, %7
+ %19 = phi i64 [ %14, %7 ], [ %32, %18 ]
+ %20 = phi i64 [ 0, %7 ], [ %33, %18 ]
+ %21 = getelementptr inbounds i8, i8* %9, i64 %20
+ %22 = bitcast i8* %21 to i64*
+ store i64 %8, i64* %22, align 8
+ %23 = getelementptr inbounds i8, i8* %11, i64 %20
+ %24 = bitcast i8* %23 to i64*
+ store i64 %10, i64* %24, align 8
+ %25 = getelementptr inbounds i8, i8* %12, i64 %20
+ %26 = bitcast i8* %25 to i64*
+ %27 = load i64, i64* %26, align 8
+ %28 = getelementptr inbounds i8, i8* %13, i64 %20
+ %29 = bitcast i8* %28 to i64*
+ %30 = load i64, i64* %29, align 8
+ %31 = mul i64 %30, %27
+ %32 = mul i64 %31, %19
+ %33 = add nuw i64 %20, 1
+ %34 = icmp eq i64 %33, %1
+ br i1 %34, label %15, label %18
+}
+
+; CHECK-LABEL: test_preinc_i64_ldst
+; CHECK-NOT: addi {{[0-9]+}}, {{[0-9]+}}, -3071
+; CHECK-NOT: addi {{[0-9]+}}, {{[0-9]+}}, -2047
+; CHECK-NOT: addi {{[0-9]+}}, {{[0-9]+}}, -1023
+; CHECK-DAG: ld {{[0-9]+}}, -1024([[REG2:[0-9]+]])
+; CHECK-DAG: ld {{[0-9]+}}, -2048([[REG2]])
+; CHECK-DAG: std {{[0-9]+}}, 1024([[REG2]])
+; CHECK-DAG: std {{[0-9]+}}, 0([[REG2]])
OpenPOWER on IntegriCloud