diff options
| author | Eric Christopher <echristo@gmail.com> | 2019-04-17 04:52:47 +0000 |
|---|---|---|
| committer | Eric Christopher <echristo@gmail.com> | 2019-04-17 04:52:47 +0000 |
| commit | cee313d288a4faf0355d76fb6e0e927e211d08a5 (patch) | |
| tree | d386075318d761197779a96e5d8fc0dc7b06342b /llvm/test/Transforms/LoopLoadElim | |
| parent | c3d6a929fdd92fd06d4304675ade8d7210ee711a (diff) | |
| download | bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.tar.gz bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.zip | |
Revert "Temporarily Revert "Add basic loop fusion pass.""
The reversion apparently deleted the test/Transforms directory.
Will be re-reverting again.
llvm-svn: 358552
Diffstat (limited to 'llvm/test/Transforms/LoopLoadElim')
| -rw-r--r-- | llvm/test/Transforms/LoopLoadElim/backward.ll | 33 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopLoadElim/cond-load.ll | 42 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopLoadElim/def-store-before-load.ll | 35 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopLoadElim/forward.ll | 48 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopLoadElim/loop-simplify-dep.ll | 33 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopLoadElim/memcheck.ll | 52 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll | 48 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopLoadElim/non-consecutive.ll | 43 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopLoadElim/opt-size.ll | 129 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll | 92 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopLoadElim/type-mismatch.ll | 89 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopLoadElim/unknown-dep.ll | 54 |
12 files changed, 698 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopLoadElim/backward.ll b/llvm/test/Transforms/LoopLoadElim/backward.ll new file mode 100644 index 00000000000..c0cec75bdd3 --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/backward.ll @@ -0,0 +1,33 @@ +; RUN: opt -loop-load-elim -S < %s | FileCheck %s +; RUN: opt -passes=loop-load-elim -S < %s | FileCheck %s + +; Simple st->ld forwarding derived from a lexical backward dep. +; +; for (unsigned i = 0; i < 100; i++) +; A[i+1] = A[i] + B[i]; + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N) { +entry: +; CHECK: %load_initial = load i32, i32* %A + br label %for.body + +for.body: ; preds = %for.body, %entry +; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %add, %for.body ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %load = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %load_1 = load i32, i32* %arrayidx2, align 4 +; CHECK: %add = add i32 %load_1, %store_forwarded + %add = add i32 %load_1, %load + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + store i32 %add, i32* %arrayidx_next, align 4 + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/cond-load.ll b/llvm/test/Transforms/LoopLoadElim/cond-load.ll new file mode 100644 index 00000000000..e337397e996 --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/cond-load.ll @@ -0,0 +1,42 @@ +; RUN: opt -S -loop-load-elim < %s | FileCheck %s + +; We can't hoist conditional loads to the preheader for the initial value. +; E.g. in the loop below we'd access array[-1] if we did: +; +; for(int i = 0 ; i < n ; i++ ) +; array[i] = ( i > 0 ? array[i - 1] : 0 ) + 4; + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define void @f(i32* %array, i32 %n) { +entry: + %cmp10 = icmp sgt i32 %n, 0 + br i1 %cmp10, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %cond.end, %entry + ret void + +for.body: ; preds = %entry, %cond.end + %indvars.iv = phi i64 [ %indvars.iv.next, %cond.end ], [ 0, %entry ] +; CHECK-NOT: %store_forwarded = phi + %cmp1 = icmp sgt i64 %indvars.iv, 0 + br i1 %cmp1, label %cond.true, label %cond.end + +cond.true: ; preds = %for.body + %0 = add nsw i64 %indvars.iv, -1 + %arrayidx = getelementptr inbounds i32, i32* %array, i64 %0 + %1 = load i32, i32* %arrayidx, align 4 + br label %cond.end + +cond.end: ; preds = %for.body, %cond.true + %cond = phi i32 [ %1, %cond.true ], [ 0, %for.body ] +; CHECK: %cond = phi i32 [ %1, %cond.true ], [ 0, %for.body ] + %add = add nsw i32 %cond, 4 + %arrayidx3 = getelementptr inbounds i32, i32* %array, i64 %indvars.iv + store i32 %add, i32* %arrayidx3, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/Transforms/LoopLoadElim/def-store-before-load.ll b/llvm/test/Transforms/LoopLoadElim/def-store-before-load.ll new file mode 100644 index 00000000000..3dc93f6786e --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/def-store-before-load.ll @@ -0,0 +1,35 @@ +; RUN: opt -loop-load-elim -S < %s | FileCheck %s + +; No loop-carried forwarding: The intervening store to A[i] kills the stored +; value from the previous iteration. +; +; for (unsigned i = 0; i < 100; i++) { +; A[i] = 1; +; A[i+1] = A[i] + B[i]; +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry +; CHECK-NOT: %store_forwarded + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 1, i32* %arrayidx, align 4 + %a = load i32, i32* %arrayidx, align 4 + %arrayidxB = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %b = load i32, i32* %arrayidxB, align 4 +; CHECK: %add = add i32 %b, %a + %add = add i32 %b, %a + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + store i32 %add, i32* %arrayidx_next, align 4 + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/forward.ll b/llvm/test/Transforms/LoopLoadElim/forward.ll new file mode 100644 index 00000000000..0b270cab3ed --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/forward.ll @@ -0,0 +1,48 @@ +; RUN: opt -loop-load-elim -S < %s | FileCheck %s +; RUN: opt -passes=loop-load-elim -S < %s | FileCheck %s + +; Simple st->ld forwarding derived from a lexical forward dep. +; +; for (unsigned i = 0; i < 100; i++) { +; A[i+1] = B[i] + 2; +; C[i] = A[i] * 2; +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %B, i32* %C, i64 %N) { + +; CHECK: for.body.lver.check: +; CHECK: %found.conflict{{.*}} = +; CHECK-NOT: %found.conflict{{.*}} = + +entry: +; Make sure the hoisted load keeps the alignment +; CHECK: %load_initial = load i32, i32* %A, align 1 + br label %for.body + +for.body: ; preds = %for.body, %entry +; CHECK: %store_forwarded = phi i32 [ %load_initial, %for.body.ph ], [ %a_p1, %for.body ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + + %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + + %b = load i32, i32* %Bidx, align 4 + %a_p1 = add i32 %b, 2 + store i32 %a_p1, i32* %Aidx_next, align 4 + + %a = load i32, i32* %Aidx, align 1 +; CHECK: %c = mul i32 %store_forwarded, 2 + %c = mul i32 %a, 2 + store i32 %c, i32* %Cidx, align 4 + + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/loop-simplify-dep.ll b/llvm/test/Transforms/LoopLoadElim/loop-simplify-dep.ll new file mode 100644 index 00000000000..f6bfe96d9c9 --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/loop-simplify-dep.ll @@ -0,0 +1,33 @@ +; RUN: opt -loop-load-elim -S < %s | FileCheck %s + +; Make sure we create a preheader if we dont' have one. + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N, i1 %C) { +entry: + br i1 %C, label %for.body, label %for.end + +; CHECK: for.body.preheader: +; CHECK-NEXT: %load_initial = load i32, i32* %A +; CHECK-NEXT: br label %for.body + +; CHECK: for.body: +for.body: +; CHECK-NEXT: %store_forwarded = phi i32 [ %load_initial, %for.body.preheader ], [ %add, %for.body ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %load = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %load_1 = load i32, i32* %arrayidx2, align 4 +; CHECK: %add = add i32 %load_1, %store_forwarded + %add = add i32 %load_1, %load + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + store i32 %add, i32* %arrayidx_next, align 4 + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/memcheck.ll b/llvm/test/Transforms/LoopLoadElim/memcheck.ll new file mode 100644 index 00000000000..8eadd437a5a --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/memcheck.ll @@ -0,0 +1,52 @@ +; RUN: opt -loop-load-elim -S < %s | FileCheck %s +; RUN: opt -loop-load-elim -S -runtime-check-per-loop-load-elim=2 < %s | FileCheck %s --check-prefix=AGGRESSIVE + +; This needs two pairs of memchecks (A * { C, D }) for a single load +; elimination which is considered to expansive by default. +; +; for (unsigned i = 0; i < 100; i++) { +; A[i+1] = B[i] + 2; +; C[i] = A[i] * 2; +; D[i] = 2; +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %B, i32* %C, i64 %N, i32* %D) { +entry: + br label %for.body + +; AGGRESSIVE: for.body.lver.check: +; AGGRESSIVE: %found.conflict{{.*}} = +; AGGRESSIVE: %found.conflict{{.*}} = +; AGGRESSIVE-NOT: %found.conflict{{.*}} = + +for.body: ; preds = %for.body, %entry +; CHECK-NOT: %store_forwarded = +; AGGRESSIVE: %store_forwarded = + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + + %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %Didx = getelementptr inbounds i32, i32* %D, i64 %indvars.iv + + %b = load i32, i32* %Bidx, align 4 + %a_p1 = add i32 %b, 2 + store i32 %a_p1, i32* %Aidx_next, align 4 + + %a = load i32, i32* %Aidx, align 4 +; CHECK: %c = mul i32 %a, 2 +; AGGRESSIVE: %c = mul i32 %store_forwarded, 2 + %c = mul i32 %a, 2 + store i32 %c, i32* %Cidx, align 4 + store i32 2, i32* %Didx, align 4 + + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll b/llvm/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll new file mode 100644 index 00000000000..b0c0f3dee86 --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll @@ -0,0 +1,48 @@ +; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s + +; In this case the later store forward to the load: +; +; for (unsigned i = 0; i < 100; i++) { +; B[i] = A[i] + 1; +; A[i+1] = C[i] + 2; +; A[i+1] = D[i] + 3; +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, + i32* noalias nocapture %C, i32* noalias nocapture readonly %D, + i64 %N) { +entry: +; CHECK: %load_initial = load i32, i32* %A + br label %for.body + +for.body: ; preds = %for.body, %entry +; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %addD, %for.body ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidxA = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %loadA = load i32, i32* %arrayidxA, align 4 +; CHECK: %addA = add i32 %store_forwarded, 1 + %addA = add i32 %loadA, 1 + + %arrayidxB = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + store i32 %addA, i32* %arrayidxB, align 4 + + %arrayidxC = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %loadC = load i32, i32* %arrayidxC, align 4 + %addC = add i32 %loadC, 2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %arrayidxA_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + store i32 %addC, i32* %arrayidxA_next, align 4 + + %arrayidxD = getelementptr inbounds i32, i32* %D, i64 %indvars.iv + %loadD = load i32, i32* %arrayidxD, align 4 + %addD = add i32 %loadD, 3 + store i32 %addD, i32* %arrayidxA_next, align 4 + + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/non-consecutive.ll b/llvm/test/Transforms/LoopLoadElim/non-consecutive.ll new file mode 100644 index 00000000000..43751a8ff60 --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/non-consecutive.ll @@ -0,0 +1,43 @@ +; RUN: opt -loop-load-elim -S < %s | FileCheck %s + +; The accesses to A are independent here but LAA reports it as a loop-carried +; forward dependence. Check that we don't perform st->ld forwarding between +; them. +; +; for (unsigned i = 0; i < 100; i++) { +; A[i][1] = B[i] + 2; +; C[i] = A[i][0] * 2; +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @f([2 x i32]* noalias %A, i32* noalias %B, i32* noalias %C, i64 %N) { + +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + + %A1idx = getelementptr inbounds [2 x i32], [2 x i32]* %A, i64 %indvars.iv, i32 1 + %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %A0idx = getelementptr inbounds [2 x i32], [2 x i32]* %A, i64 %indvars.iv, i32 0 + + %b = load i32, i32* %Bidx, align 4 + %a_p1 = add i32 %b, 2 + store i32 %a_p1, i32* %A1idx, align 4 + +; CHECK: %a = load i32, i32* %A0idx, align 4 + %a = load i32, i32* %A0idx, align 4 +; CHECK: %c = mul i32 %a, 2 + %c = mul i32 %a, 2 + store i32 %c, i32* %Cidx, align 4 + + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/opt-size.ll b/llvm/test/Transforms/LoopLoadElim/opt-size.ll new file mode 100644 index 00000000000..f9d82fec2a2 --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/opt-size.ll @@ -0,0 +1,129 @@ +; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s +; RUN: opt -basicaa -loop-load-elim -pgso -S < %s | FileCheck %s -check-prefix=PGSO +; RUN: opt -basicaa -loop-load-elim -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO + +; When optimizing for size don't eliminate in this loop because the loop would +; have to be versioned first because A and C may alias. +; +; for (unsigned i = 0; i < 100; i++) { +; A[i+1] = B[i] + 2; +; C[i] = A[i] * 2; +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK-LABEL: @f( +define void @f(i32* %A, i32* %B, i32* %C, i64 %N) optsize { + +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + + %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + + %b = load i32, i32* %Bidx, align 4 + %a_p1 = add i32 %b, 2 + store i32 %a_p1, i32* %Aidx_next, align 4 + + %a = load i32, i32* %Aidx, align 4 +; CHECK: %c = mul i32 %a, 2 + %c = mul i32 %a, 2 + store i32 %c, i32* %Cidx, align 4 + + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Same loop but with noalias on %A and %C. In this case load-eliminate even +; with -Os. + +; CHECK-LABEL: @g( +define void @g(i32* noalias %A, i32* %B, i32* noalias %C, i64 %N) optsize { + +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + + %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + + %b = load i32, i32* %Bidx, align 4 + %a_p1 = add i32 %b, 2 + store i32 %a_p1, i32* %Aidx_next, align 4 + + %a = load i32, i32* %Aidx, align 4 +; CHECK: %c = mul i32 %store_forwarded, 2 + %c = mul i32 %a, 2 + store i32 %c, i32* %Cidx, align 4 + + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + + +; PGSO-LABEL: @f_pgso( +; NPGSO-LABEL: @f_pgso( +define void @f_pgso(i32* %A, i32* %B, i32* %C, i64 %N) !prof !14 { + +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + + %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + + %b = load i32, i32* %Bidx, align 4 + %a_p1 = add i32 %b, 2 + store i32 %a_p1, i32* %Aidx_next, align 4 + + %a = load i32, i32* %Aidx, align 4 +; PGSO: %c = mul i32 %a, 2 +; NPGSO-NOT: %c = mul i32 %a, 2 + %c = mul i32 %a, 2 + store i32 %c, i32* %Cidx, align 4 + + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 0} diff --git a/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll b/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll new file mode 100644 index 00000000000..7a2d1b6c7e3 --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll @@ -0,0 +1,92 @@ +; RUN: opt -loop-load-elim -S < %s | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \ +; RUN: -check-prefix=TWO_STRIDE_SPEC + +; RUN: opt -loop-load-elim -S -enable-mem-access-versioning=0 < %s | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=NO_ONE_STRIDE_SPEC \ +; RUN: -check-prefix=NO_TWO_STRIDE_SPEC + +; RUN: opt -loop-load-elim -S -loop-load-elimination-scev-check-threshold=1 < %s | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \ +; RUN: -check-prefix=NO_TWO_STRIDE_SPEC + +; Forwarding in the presence of symbolic strides: +; +; for (unsigned i = 0; i < 100; i++) +; A[i + 1] = A[Stride * i] + B[i]; + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; ALL-LABEL: @f( +define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N, + i64 %stride) { + +; ONE_STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1 + +entry: +; NO_ONE_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A +; ONE_STRIDE_SPEC: %load_initial = load i32, i32* %A + br label %for.body + +for.body: ; preds = %for.body, %entry +; NO_ONE_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ] +; ONE_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %mul = mul i64 %indvars.iv, %stride + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul + %load = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %load_1 = load i32, i32* %arrayidx2, align 4 +; NO_ONE_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded +; ONE_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded + %add = add i32 %load_1, %load + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + store i32 %add, i32* %arrayidx_next, align 4 + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; With two symbolic strides: +; +; for (unsigned i = 0; i < 100; i++) +; A[Stride2 * (i + 1)] = A[Stride1 * i] + B[i]; + +; ALL-LABEL: @two_strides( +define void @two_strides(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N, + i64 %stride.1, i64 %stride.2) { + +; TWO_STRIDE_SPEC: %ident.check = icmp ne i64 %stride.2, 1 +; TWO_STRIDE_SPEC: %ident.check1 = icmp ne i64 %stride.1, 1 +; NO_TWO_STRIDE_SPEC-NOT: %ident.check{{.*}} = icmp ne i64 %stride{{.*}}, 1 + +entry: +; NO_TWO_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A +; TWO_STRIDE_SPEC: %load_initial = load i32, i32* %A + br label %for.body + +for.body: ; preds = %for.body, %entry +; NO_TWO_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ] +; TWO_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %mul = mul i64 %indvars.iv, %stride.1 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul + %load = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %load_1 = load i32, i32* %arrayidx2, align 4 +; NO_TWO_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded +; TWO_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded + %add = add i32 %load_1, %load + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %mul.2 = mul i64 %indvars.iv.next, %stride.2 + %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %mul.2 + store i32 %add, i32* %arrayidx_next, align 4 + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll b/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll new file mode 100644 index 00000000000..ab8029bd35f --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll @@ -0,0 +1,89 @@ +; RUN: opt -loop-load-elim -S < %s | FileCheck %s + +; Don't crash if the store and the load use different types. +; +; for (unsigned i = 0; i < 100; i++) { +; A[i+1] = B[i] + 2; +; C[i] = ((float*)A)[i] * 2; +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK-LABEL: @f( +define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %C, i64 %N) { + +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + + %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %Aidx.float = bitcast i32* %Aidx to float* + + %b = load i32, i32* %Bidx, align 4 + %a_p1 = add i32 %b, 2 + store i32 %a_p1, i32* %Aidx_next, align 4 + +; CHECK: %a = load float, float* %Aidx.float, align 4 + %a = load float, float* %Aidx.float, align 4 +; CHECK-NEXT: %c = fmul float %a, 2.0 + %c = fmul float %a, 2.0 + %c.int = fptosi float %c to i32 + store i32 %c.int, i32* %Cidx, align 4 + + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Don't crash if the store and the load use different types. +; +; for (unsigned i = 0; i < 100; i++) { +; A[i+1] = B[i] + 2; +; A[i+1] = B[i] + 3; +; C[i] = ((float*)A)[i] * 2; +; } + +; CHECK-LABEL: @f2( +define void @f2(i32* noalias %A, i32* noalias %B, i32* noalias %C, i64 %N) { + +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + + %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %Aidx.float = bitcast i32* %Aidx to float* + + %b = load i32, i32* %Bidx, align 4 + %a_p2 = add i32 %b, 2 + store i32 %a_p2, i32* %Aidx_next, align 4 + + %a_p3 = add i32 %b, 3 + store i32 %a_p3, i32* %Aidx_next, align 4 + +; CHECK: %a = load float, float* %Aidx.float, align 4 + %a = load float, float* %Aidx.float, align 4 +; CHECK-NEXT: %c = fmul float %a, 2.0 + %c = fmul float %a, 2.0 + %c.int = fptosi float %c to i32 + store i32 %c.int, i32* %Cidx, align 4 + + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/unknown-dep.ll b/llvm/test/Transforms/LoopLoadElim/unknown-dep.ll new file mode 100644 index 00000000000..d2df718ca4c --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/unknown-dep.ll @@ -0,0 +1,54 @@ +; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; Give up in the presence of unknown deps. Here, the different strides result +; in unknown dependence: +; +; for (unsigned i = 0; i < 100; i++) { +; A[i+1] = B[i] + 2; +; A[2*i] = C[i] + 2; +; D[i] = A[i] + 2; +; } + +define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %C, + i32* noalias %D, i64 %N) { + +entry: +; for.body.ph: +; CHECK-NOT: %load_initial = + br label %for.body + +for.body: ; preds = %for.body, %entry +; CHECK-NOT: %store_forwarded = + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + + %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next + %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %Didx = getelementptr inbounds i32, i32* %D, i64 %indvars.iv + %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %indvars.m2 = mul nuw nsw i64 %indvars.iv, 2 + %A2idx = getelementptr inbounds i32, i32* %A, i64 %indvars.m2 + + %b = load i32, i32* %Bidx, align 4 + %a_p1 = add i32 %b, 2 + store i32 %a_p1, i32* %Aidx_next, align 4 + + %c = load i32, i32* %Cidx, align 4 + %a_m2 = add i32 %c, 2 + store i32 %a_m2, i32* %A2idx, align 4 + + %a = load i32, i32* %Aidx, align 4 +; CHECK-NOT: %d = add i32 %store_forwarded, 2 +; CHECK: %d = add i32 %a, 2 + %d = add i32 %a, 2 + store i32 %d, i32* %Didx, align 4 + + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} |

