diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/include/llvm/Analysis/LoopAccessAnalysis.h | 13 | ||||
-rw-r--r-- | llvm/lib/Analysis/LoopAccessAnalysis.cpp | 38 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 17 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopDistribute/symbolic-stride.ll | 65 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll | 69 |
6 files changed, 154 insertions, 50 deletions
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index dba0b362fd8..79110a874d6 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -513,8 +513,7 @@ class LoopAccessInfo { public: LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, const TargetLibraryInfo *TLI, AliasAnalysis *AA, - DominatorTree *DT, LoopInfo *LI, - bool SpeculateSymbolicStrides); + DominatorTree *DT, LoopInfo *LI); /// Return true we can analyze the memory accesses in the loop and there are /// no memory dependence cycles. @@ -585,11 +584,6 @@ public: /// \brief Print the information about the memory accesses in the loop. void print(raw_ostream &OS, unsigned Depth = 0) const; - /// \brief Used to ensure that if the analysis was run with speculating the - /// value of symbolic strides, the client queries it with the same assumption. - /// Only used in DEBUG build but we don't want NDEBUG-dependent ABI. - bool SpeculateSymbolicStrides; - /// \brief Checks existence of store to invariant address inside loop. /// If the loop has any store to invariant address, then it returns true, /// else returns false. @@ -715,11 +709,8 @@ public: /// \brief Query the result of the loop access information for the loop \p L. /// - /// \p SpeculateSymbolicStrides enables symbolic value speculation. The - /// corresponding run-time checks are collected in LAI::PSE. - /// /// If there is no cached result available run the analysis. - const LoopAccessInfo &getInfo(Loop *L, bool SpeculateSymbolicStrides = false); + const LoopAccessInfo &getInfo(Loop *L); void releaseMemory() override { // Invalidate the cache when the pass is freed. diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 69e7ce49cb6..44208408b0f 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -65,6 +65,21 @@ static cl::opt<unsigned> "loop-access analysis (default = 100)"), cl::init(100)); +/// This enables versioning on the strides of symbolically striding memory +/// accesses in code like the following. +/// for (i = 0; i < N; ++i) +/// A[i * Stride1] += B[i * Stride2] ... +/// +/// Will be roughly translated to +/// if (Stride1 == 1 && Stride2 == 1) { +/// for (i = 0; i < N; i+=4) +/// A[i:i+3] += ... +/// } else +/// ... +static cl::opt<bool> EnableMemAccessVersioning( + "enable-mem-access-versioning", cl::init(true), cl::Hidden, + cl::desc("Enable symbolic stride memory access versioning")); + /// \brief Enable store-to-load forwarding conflict detection. This option can /// be disabled for correctness testing. static cl::opt<bool> EnableForwardingConflictDetection( @@ -1540,7 +1555,7 @@ void LoopAccessInfo::analyzeLoop() { NumLoads++; Loads.push_back(Ld); DepChecker.addAccess(Ld); - if (SpeculateSymbolicStrides) + if (EnableMemAccessVersioning) collectStridedAccess(Ld); continue; } @@ -1564,7 +1579,7 @@ void LoopAccessInfo::analyzeLoop() { NumStores++; Stores.push_back(St); DepChecker.addAccess(St); - if (SpeculateSymbolicStrides) + if (EnableMemAccessVersioning) collectStridedAccess(St); } } // Next instr. @@ -1904,11 +1919,9 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, const TargetLibraryInfo *TLI, AliasAnalysis *AA, - DominatorTree *DT, LoopInfo *LI, - bool SpeculateSymbolicStrides) - : SpeculateSymbolicStrides(SpeculateSymbolicStrides), PSE(*SE, *L), - PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL), TLI(TLI), - AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), + DominatorTree *DT, LoopInfo *LI) + : PSE(*SE, *L), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL), + TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { if (canAnalyzeLoop()) @@ -1955,19 +1968,12 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { PSE.print(OS, Depth); } -const LoopAccessInfo & -LoopAccessAnalysis::getInfo(Loop *L, bool SpeculateSymbolicStrides) { +const LoopAccessInfo &LoopAccessAnalysis::getInfo(Loop *L) { auto &LAI = LoopAccessInfoMap[L]; -#ifndef NDEBUG - assert((!LAI || LAI->SpeculateSymbolicStrides == SpeculateSymbolicStrides) && - "Symbolic strides changed for loop"); -#endif - if (!LAI) { const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, - SpeculateSymbolicStrides); + LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI); } return *LAI.get(); } diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 3ac01a75892..b88ca7e717d 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -385,7 +385,7 @@ bool LoopVersioningLICM::legalLoopInstructions() { return false; } // Get LoopAccessInfo from current loop. - LAI = &LAA->getInfo(CurLoop, true); + LAI = &LAA->getInfo(CurLoop); // Check LoopAccessInfo for need of runtime check. if (LAI->getRuntimePointerChecking()->getChecks().empty()) { DEBUG(dbgs() << " LAA: Runtime check not found !!\n"); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0c4605ea4b3..954b5938cf5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -130,21 +130,6 @@ static cl::opt<bool> MaximizeBandwidth( cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop.")); -/// This enables versioning on the strides of symbolically striding memory -/// accesses in code like the following. -/// for (i = 0; i < N; ++i) -/// A[i * Stride1] += B[i * Stride2] ... -/// -/// Will be roughly translated to -/// if (Stride1 == 1 && Stride2 == 1) { -/// for (i = 0; i < N; i+=4) -/// A[i:i+3] += ... -/// } else -/// ... -static cl::opt<bool> EnableMemAccessVersioning( - "enable-mem-access-versioning", cl::init(true), cl::Hidden, - cl::desc("Enable symbolic stride memory access versioning")); - static cl::opt<bool> EnableInterleavedMemAccesses( "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, cl::desc("Enable vectorization on interleaved memory accesses in a loop")); @@ -4970,7 +4955,7 @@ void LoopVectorizationLegality::collectLoopUniforms() { } bool LoopVectorizationLegality::canVectorizeMemory() { - LAI = &LAA->getInfo(TheLoop, EnableMemAccessVersioning); + LAI = &LAA->getInfo(TheLoop); auto &OptionalReport = LAI->getReport(); if (OptionalReport) emitAnalysis(VectorizationReport(*OptionalReport)); diff --git a/llvm/test/Transforms/LoopDistribute/symbolic-stride.ll b/llvm/test/Transforms/LoopDistribute/symbolic-stride.ll new file mode 100644 index 00000000000..73d3d19c5dd --- /dev/null +++ b/llvm/test/Transforms/LoopDistribute/symbolic-stride.ll @@ -0,0 +1,65 @@ +; RUN: opt -basicaa -loop-distribute -S < %s | \ +; RUN: FileCheck %s --check-prefix=ALL --check-prefix=STRIDE_SPEC + +; RUN: opt -basicaa -loop-distribute -S -enable-mem-access-versioning=0 < %s | \ +; RUN: FileCheck %s --check-prefix=ALL --check-prefix=NO_STRIDE_SPEC + +; If we don't speculate stride for 1 we can't distribute along the line +; because we could have a backward dependence: +; +; for (i = 0; i < n; i++) { +; A[i + 1] = A[i] * B[i]; +; ======================= +; C[i] = D[i] * A[stride * i]; +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; ALL-LABEL: @f( +define void @f(i32* noalias %a, + i32* noalias %b, + i32* noalias %c, + i32* noalias %d, + i64 %stride) { +entry: + br label %for.body + +; STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1 + +; STRIDE_SPEC: for.body.ldist1: +; NO_STRIDE_SPEC-NOT: for.body.ldist1: + +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] + + %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind + %loadA = load i32, i32* %arrayidxA, align 4 + + %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind + %loadB = load i32, i32* %arrayidxB, align 4 + + %mulA = mul i32 %loadB, %loadA + + %add = add nuw nsw i64 %ind, 1 + %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add + store i32 %mulA, i32* %arrayidxA_plus_4, align 4 + + %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind + %loadD = load i32, i32* %arrayidxD, align 4 + + %mul = mul i64 %ind, %stride + %arrayidxStridedA = getelementptr inbounds i32, i32* %a, i64 %mul + %loadStridedA = load i32, i32* %arrayidxStridedA, align 4 + + %mulC = mul i32 %loadD, %loadStridedA + + %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind + store i32 %mulC, i32* %arrayidxC, align 4 + + %exitcond = icmp eq i64 %add, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll b/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll index 3e33b3fc173..7a2d1b6c7e3 100644 --- a/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll +++ b/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll @@ -1,28 +1,44 @@ -; RUN: opt -loop-load-elim -S < %s | FileCheck %s +; RUN: opt -loop-load-elim -S < %s | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \ +; RUN: -check-prefix=TWO_STRIDE_SPEC -; Forwarding in the presence of symbolic strides is currently not supported: +; RUN: opt -loop-load-elim -S -enable-mem-access-versioning=0 < %s | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=NO_ONE_STRIDE_SPEC \ +; RUN: -check-prefix=NO_TWO_STRIDE_SPEC + +; RUN: opt -loop-load-elim -S -loop-load-elimination-scev-check-threshold=1 < %s | \ +; RUN: FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \ +; RUN: -check-prefix=NO_TWO_STRIDE_SPEC + +; Forwarding in the presence of symbolic strides: ; ; for (unsigned i = 0; i < 100; i++) ; A[i + 1] = A[Stride * i] + B[i]; target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -; CHECK-LABEL: @f( +; ALL-LABEL: @f( define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N, i64 %stride) { + +; ONE_STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1 + entry: -; CHECK-NOT: %load_initial = load i32, i32* %A +; NO_ONE_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A +; ONE_STRIDE_SPEC: %load_initial = load i32, i32* %A br label %for.body for.body: ; preds = %for.body, %entry -; CHECK-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ] +; NO_ONE_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ] +; ONE_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ] %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %mul = mul i64 %indvars.iv, %stride %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul %load = load i32, i32* %arrayidx, align 4 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv %load_1 = load i32, i32* %arrayidx2, align 4 -; CHECK-NOT: %add = add i32 %load_1, %store_forwarded +; NO_ONE_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded +; ONE_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded %add = add i32 %load_1, %load %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next @@ -33,3 +49,44 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } + +; With two symbolic strides: +; +; for (unsigned i = 0; i < 100; i++) +; A[Stride2 * (i + 1)] = A[Stride1 * i] + B[i]; + +; ALL-LABEL: @two_strides( +define void @two_strides(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N, + i64 %stride.1, i64 %stride.2) { + +; TWO_STRIDE_SPEC: %ident.check = icmp ne i64 %stride.2, 1 +; TWO_STRIDE_SPEC: %ident.check1 = icmp ne i64 %stride.1, 1 +; NO_TWO_STRIDE_SPEC-NOT: %ident.check{{.*}} = icmp ne i64 %stride{{.*}}, 1 + +entry: +; NO_TWO_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A +; TWO_STRIDE_SPEC: %load_initial = load i32, i32* %A + br label %for.body + +for.body: ; preds = %for.body, %entry +; NO_TWO_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ] +; TWO_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %mul = mul i64 %indvars.iv, %stride.1 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul + %load = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %load_1 = load i32, i32* %arrayidx2, align 4 +; NO_TWO_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded +; TWO_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded + %add = add i32 %load_1, %load + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %mul.2 = mul i64 %indvars.iv.next, %stride.2 + %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %mul.2 + store i32 %add, i32* %arrayidx_next, align 4 + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} |