summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRong Xu <xur@google.com>2019-10-10 21:30:43 +0000
committerRong Xu <xur@google.com>2019-10-10 21:30:43 +0000
commit686fa4bbfbce034484fee8616d9ec7c29ed96410 (patch)
treefc74f31061b9df30f4b6ec5cfc362aaca083c8e8
parent8bd42769816ad339bf64ae5ef72078739ed488a9 (diff)
downloadbcm5719-llvm-686fa4bbfbce034484fee8616d9ec7c29ed96410.tar.gz
bcm5719-llvm-686fa4bbfbce034484fee8616d9ec7c29ed96410.zip
[ValueTracking] Improve pointer offset computation for cases of same base
This patch improves the handling of pointer offset in GEP expressions where one argument is the base pointer. isPointerOffset() is being used by memcpyopt where current code synthesizes consecutive 32 bytes stores to one store and two memset intrinsic calls. With this patch, we convert the stores to one memset intrinsic. Differential Revision: https://reviews.llvm.org/D67989 llvm-svn: 374454
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp48
-rw-r--r--llvm/test/Transforms/MemCpyOpt/store-to-memset.ll77
2 files changed, 116 insertions, 9 deletions
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index f0b87810ef9..aaebba00527 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5755,17 +5755,47 @@ Optional<int64_t> llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2,
const GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
const GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
- // If one pointer is a GEP and the other isn't, then see if the GEP is a
- // constant offset from the base, as in "P" and "gep P, 1".
- if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
- auto Offset = getOffsetFromIndex(GEP1, 1, DL);
- if (!Offset)
+ // If one pointer is a GEP see if the GEP is a constant offset from the base,
+ // as in "P" and "gep P, 1".
+ // Also do this iteratively to handle the the following case:
+ // Ptr_t1 = GEP Ptr1, c1
+ // Ptr_t2 = GEP Ptr_t1, c2
+ // Ptr2 = GEP Ptr_t2, c3
+ // where we will return c1+c2+c3.
+ // TODO: Handle the case when both Ptr1 and Ptr2 are GEPs of some common base
+ // -- replace getOffsetFromBase with getOffsetAndBase, check that the bases
+ // are the same, and return the difference between offsets.
+ auto getOffsetFromBase = [&DL](const GEPOperator *GEP,
+ const Value *Ptr) -> Optional<int64_t> {
+ const GEPOperator *GEP_T = GEP;
+ int64_t OffsetVal = 0;
+ bool HasSameBase = false;
+ while (GEP_T) {
+ auto Offset = getOffsetFromIndex(GEP_T, 1, DL);
+ if (!Offset)
+ return None;
+ OffsetVal += *Offset;
+ auto Op0 = GEP_T->getOperand(0)->stripPointerCasts();
+ if (Op0 == Ptr) {
+ HasSameBase = true;
+ break;
+ }
+ GEP_T = dyn_cast<GEPOperator>(Op0);
+ }
+ if (!HasSameBase)
return None;
- return -*Offset;
- }
+ return OffsetVal;
+ };
- if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
- return getOffsetFromIndex(GEP2, 1, DL);
+ if (GEP1) {
+ auto Offset = getOffsetFromBase(GEP1, Ptr2);
+ if (Offset)
+ return -*Offset;
+ }
+ if (GEP2) {
+ auto Offset = getOffsetFromBase(GEP2, Ptr1);
+ if (Offset)
+ return Offset;
}
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
diff --git a/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll b/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll
new file mode 100644
index 00000000000..59ed892b60e
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll
@@ -0,0 +1,77 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+define i8* @foo(i8* returned %0, i32 %1, i64 %2) {
+entry:
+ %3 = getelementptr inbounds i8, i8* %0, i64 %2
+ %4 = getelementptr inbounds i8, i8* %3, i64 -32
+ %vv = trunc i32 %1 to i8
+ store i8 %vv, i8* %4, align 1
+ %5 = getelementptr inbounds i8, i8* %4, i64 1
+ store i8 %vv, i8* %5, align 1
+ %6= getelementptr inbounds i8, i8* %4, i64 2
+ store i8 %vv, i8* %6, align 1
+ %7= getelementptr inbounds i8, i8* %4, i64 3
+ store i8 %vv, i8* %7, align 1
+ %8= getelementptr inbounds i8, i8* %4, i64 4
+ store i8 %vv, i8* %8, align 1
+ %9= getelementptr inbounds i8, i8* %4, i64 5
+ store i8 %vv, i8* %9, align 1
+ %10= getelementptr inbounds i8, i8* %4, i64 6
+ store i8 %vv, i8* %10, align 1
+ %11= getelementptr inbounds i8, i8* %4, i64 7
+ store i8 %vv, i8* %11, align 1
+ %12= getelementptr inbounds i8, i8* %4, i64 8
+ store i8 %vv, i8* %12, align 1
+ %13= getelementptr inbounds i8, i8* %4, i64 9
+ store i8 %vv, i8* %13, align 1
+ %14= getelementptr inbounds i8, i8* %4, i64 10
+ store i8 %vv, i8* %14, align 1
+ %15= getelementptr inbounds i8, i8* %4, i64 11
+ store i8 %vv, i8* %15, align 1
+ %16= getelementptr inbounds i8, i8* %4, i64 12
+ store i8 %vv, i8* %16, align 1
+ %17= getelementptr inbounds i8, i8* %4, i64 13
+ store i8 %vv, i8* %17, align 1
+ %18= getelementptr inbounds i8, i8* %4, i64 14
+ store i8 %vv, i8* %18, align 1
+ %19= getelementptr inbounds i8, i8* %4, i64 15
+ store i8 %vv, i8* %19, align 1
+ %20= getelementptr inbounds i8, i8* %4, i64 16
+ store i8 %vv, i8* %20, align 1
+ %21= getelementptr inbounds i8, i8* %20, i64 1
+ store i8 %vv, i8* %21, align 1
+ %22= getelementptr inbounds i8, i8* %20, i64 2
+ store i8 %vv, i8* %22, align 1
+ %23= getelementptr inbounds i8, i8* %20, i64 3
+ store i8 %vv, i8* %23, align 1
+ %24= getelementptr inbounds i8, i8* %20, i64 4
+ store i8 %vv, i8* %24, align 1
+ %25= getelementptr inbounds i8, i8* %20, i64 5
+ store i8 %vv, i8* %25, align 1
+ %26= getelementptr inbounds i8, i8* %20, i64 6
+ store i8 %vv, i8* %26, align 1
+ %27= getelementptr inbounds i8, i8* %20, i64 7
+ store i8 %vv, i8* %27, align 1
+ %28= getelementptr inbounds i8, i8* %20, i64 8
+ store i8 %vv, i8* %28, align 1
+ %29= getelementptr inbounds i8, i8* %20, i64 9
+ store i8 %vv, i8* %29, align 1
+ %30= getelementptr inbounds i8, i8* %20, i64 10
+ store i8 %vv, i8* %30, align 1
+ %31 = getelementptr inbounds i8, i8* %20, i64 11
+ store i8 %vv, i8* %31, align 1
+ %32 = getelementptr inbounds i8, i8* %20, i64 12
+ store i8 %vv, i8* %32, align 1
+ %33 = getelementptr inbounds i8, i8* %20, i64 13
+ store i8 %vv, i8* %33, align 1
+ %34 = getelementptr inbounds i8, i8* %20, i64 14
+ store i8 %vv, i8* %34, align 1
+ %35 = getelementptr inbounds i8, i8* %20, i64 15
+ store i8 %vv, i8* %35, align 1
+ ret i8* %0
+; CHECK-LABEL: @foo
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %4, i8 %vv, i64 32, i1 false)
+}
+
OpenPOWER on IntegriCloud