diff options
author | Tobias Grosser <tobias@grosser.es> | 2015-09-17 17:28:15 +0000 |
---|---|---|
committer | Tobias Grosser <tobias@grosser.es> | 2015-09-17 17:28:15 +0000 |
commit | 5fd8c0961e5799ff312bfcf5483f6af150931a7a (patch) | |
tree | 5317440dcdf6319043c5d75ceceb314d8df0bcbe | |
parent | b78585b3c24e19f92afa5ab3050bc8c305e0c763 (diff) | |
download | bcm5719-llvm-5fd8c0961e5799ff312bfcf5483f6af150931a7a.tar.gz bcm5719-llvm-5fd8c0961e5799ff312bfcf5483f6af150931a7a.zip |
Model fixed-size multi-dimensional arrays if possible multi-dimensional
If the GEP instructions give us enough insights, model scalar accesses as
multi-dimensional (and generate the relevant run-time checks to ensure
correctness). This will allow us to simplify the dependence computation in
a subsequent commit.
llvm-svn: 247906
-rw-r--r-- | polly/include/polly/ScopInfo.h | 15 | ||||
-rw-r--r-- | polly/lib/Analysis/ScopInfo.cpp | 143 | ||||
-rw-r--r-- | polly/test/DependenceInfo/sequential_loops.ll | 1 | ||||
-rw-r--r-- | polly/test/Isl/Ast/alias_simple_1.ll | 10 | ||||
-rw-r--r-- | polly/test/Isl/Ast/alias_simple_2.ll | 10 | ||||
-rw-r--r-- | polly/test/Isl/Ast/alias_simple_3.ll | 10 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/scalar_codegen_crash.ll (renamed from polly/test/Isl/Ast/assumed_context_empty_domain_restriction.ll) | 26 | ||||
-rw-r--r-- | polly/test/ScopInfo/assume_gep_bounds.ll | 2 | ||||
-rw-r--r-- | polly/test/ScopInfo/loop_affine_bound_0.ll | 2 | ||||
-rw-r--r-- | polly/test/ScopInfo/loop_affine_bound_1.ll | 2 | ||||
-rw-r--r-- | polly/test/ScopInfo/loop_affine_bound_2.ll | 2 | ||||
-rw-r--r-- | polly/test/ScopInfo/stride_detection.ll | 17 |
12 files changed, 125 insertions, 115 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 57ba98f3c6e..a1a141a07da 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -679,21 +679,6 @@ private: llvm::SmallVectorImpl<MemoryAccess *> &Loads); //@} - /// @brief Derive the individual index expressions from a GEP instruction - /// - /// This function optimistically assumes the GEP references into a fixed size - /// array. If this is actually true, this function returns a list of array - /// subscript expressions as SCEV as well as a list of integers describing - /// the size of the individual array dimensions. Both lists have either equal - /// length of the size list is one element shorter in case there is no known - /// size available for the outermost array dimension. - /// - /// @param GEP The GetElementPtr instruction to analyze. - /// - /// @return A tuple with the subscript expressions and the dimension sizes. - std::tuple<std::vector<const SCEV *>, std::vector<int>> - getIndexExpressionsFromGEP(GetElementPtrInst *GEP); - /// @brief Derive assumptions about parameter values from GetElementPtrInst /// /// In case a GEP instruction references into a fixed size array e.g., an diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 0aae742f1ea..b1fa5804d74 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -289,8 +289,70 @@ static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp, return MemoryAccess::RT_NONE; } } + //===----------------------------------------------------------------------===// +/// @brief Derive the individual index expressions from a GEP instruction +/// +/// This function optimistically assumes the GEP references into a fixed size +/// array. If this is actually true, this function returns a list of array +/// subscript expressions as SCEV as well as a list of integers describing +/// the size of the individual array dimensions. Both lists have either equal +/// length of the size list is one element shorter in case there is no known +/// size available for the outermost array dimension. +/// +/// @param GEP The GetElementPtr instruction to analyze. +/// +/// @return A tuple with the subscript expressions and the dimension sizes. +static std::tuple<std::vector<const SCEV *>, std::vector<int>> +getIndexExpressionsFromGEP(GetElementPtrInst *GEP, ScalarEvolution &SE) { + std::vector<const SCEV *> Subscripts; + std::vector<int> Sizes; + + Type *Ty = GEP->getPointerOperandType(); + + bool DroppedFirstDim = false; + + for (long i = 1; i < GEP->getNumOperands(); i++) { + + const SCEV *Expr = SE.getSCEV(GEP->getOperand(i)); + + if (i == 1) { + if (auto PtrTy = dyn_cast<PointerType>(Ty)) { + Ty = PtrTy->getElementType(); + } else if (auto ArrayTy = dyn_cast<ArrayType>(Ty)) { + Ty = ArrayTy->getElementType(); + } else { + Subscripts.clear(); + Sizes.clear(); + break; + } + if (auto Const = dyn_cast<SCEVConstant>(Expr)) + if (Const->getValue()->isZero()) { + DroppedFirstDim = true; + continue; + } + Subscripts.push_back(Expr); + continue; + } + + auto ArrayTy = dyn_cast<ArrayType>(Ty); + if (!ArrayTy) { + Subscripts.clear(); + Sizes.clear(); + break; + } + + Subscripts.push_back(Expr); + if (!(DroppedFirstDim && i == 2)) + Sizes.push_back(ArrayTy->getNumElements()); + + Ty = ArrayTy->getElementType(); + } + + return std::make_tuple(Subscripts, Sizes); +} + MemoryAccess::~MemoryAccess() { isl_id_free(Id); isl_map_free(AccessRelation); @@ -561,7 +623,8 @@ MemoryAccess::MemoryAccess(const IRAccess &Access, Instruction *AccInst, AccessRelation = isl_map_flat_range_product(AccessRelation, SubscriptMap); } - AccessRelation = foldAccess(Access, AccessRelation, Statement); + if (Access.Sizes.size() > 1 && !isa<SCEVConstant>(Access.Sizes[0])) + AccessRelation = foldAccess(Access, AccessRelation, Statement); Space = Statement->getDomainSpace(); AccessRelation = isl_map_set_tuple_id( @@ -921,51 +984,6 @@ void ScopStmt::buildDomain() { Domain = isl_set_set_tuple_id(Domain, Id); } -std::tuple<std::vector<const SCEV *>, std::vector<int>> -ScopStmt::getIndexExpressionsFromGEP(GetElementPtrInst *GEP) { - ScalarEvolution &SE = *Parent.getSE(); - std::vector<const SCEV *> Subscripts; - std::vector<int> Sizes; - - Type *Ty = GEP->getPointerOperandType(); - - for (long i = 1; i < GEP->getNumOperands(); i++) { - - const SCEV *Expr = SE.getSCEV(GEP->getOperand(i)); - - if (i == 1) { - if (auto PtrTy = dyn_cast<PointerType>(Ty)) { - Ty = PtrTy->getElementType(); - } else if (auto ArrayTy = dyn_cast<ArrayType>(Ty)) { - Ty = ArrayTy->getElementType(); - } else { - Subscripts.clear(); - Sizes.clear(); - break; - } - if (auto Const = dyn_cast<SCEVConstant>(Expr)) - if (Const->getValue()->isZero()) - continue; - Subscripts.push_back(Expr); - continue; - } - - auto ArrayTy = dyn_cast<ArrayType>(Ty); - if (!ArrayTy) { - Subscripts.clear(); - Sizes.clear(); - break; - } - - Subscripts.push_back(Expr); - Sizes.push_back(ArrayTy->getNumElements()); - - Ty = ArrayTy->getElementType(); - } - - return std::make_tuple(Subscripts, Sizes); -} - void ScopStmt::deriveAssumptionsFromGEP(GetElementPtrInst *GEP) { int Dimension = 0; isl_ctx *Ctx = Parent.getIslCtx(); @@ -976,7 +994,7 @@ void ScopStmt::deriveAssumptionsFromGEP(GetElementPtrInst *GEP) { std::vector<const SCEV *> Subscripts; std::vector<int> Sizes; - std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP); + std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE); if (auto *PtrTy = dyn_cast<PointerType>(Ty)) { Dimension = 1; @@ -2898,13 +2916,42 @@ ScopInfo::buildIRAccess(Instruction *Inst, Loop *L, Region *R, Val = Store->getValueOperand(); } - const SCEV *AccessFunction = SE->getSCEVAtScope(getPointerOperand(*Inst), L); + auto Address = getPointerOperand(*Inst); + + const SCEV *AccessFunction = SE->getSCEVAtScope(Address, L); const SCEVUnknown *BasePointer = dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction)); assert(BasePointer && "Could not find base pointer"); AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer); + if (auto *GEP = dyn_cast<GetElementPtrInst>(Address)) { + std::vector<const SCEV *> Subscripts; + std::vector<int> Sizes; + std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, *SE); + auto BasePtr = GEP->getOperand(0); + + std::vector<const SCEV *> SizesSCEV; + + bool AllAffineSubcripts = true; + for (auto Subscript : Subscripts) + if (!isAffineExpr(R, Subscript, *SE)) { + AllAffineSubcripts = false; + break; + } + + if (AllAffineSubcripts && Sizes.size() > 0) { + for (auto V : Sizes) + SizesSCEV.push_back(SE->getSCEV(ConstantInt::get( + IntegerType::getInt64Ty(BasePtr->getContext()), V))); + SizesSCEV.push_back(SE->getSCEV(ConstantInt::get( + IntegerType::getInt64Ty(BasePtr->getContext()), Size))); + + return IRAccess(Type, BasePointer->getValue(), AccessFunction, Size, true, + Subscripts, SizesSCEV, Val); + } + } + auto AccItr = InsnToMemAcc.find(Inst); if (PollyDelinearize && AccItr != InsnToMemAcc.end()) return IRAccess(Type, BasePointer->getValue(), AccessFunction, Size, true, diff --git a/polly/test/DependenceInfo/sequential_loops.ll b/polly/test/DependenceInfo/sequential_loops.ll index 42d54cf9222..aadd67412c4 100644 --- a/polly/test/DependenceInfo/sequential_loops.ll +++ b/polly/test/DependenceInfo/sequential_loops.ll @@ -272,7 +272,6 @@ exit.2: ; VALUE: RAW dependences: ; VALUE: [p] -> { ; VALUE: Stmt_S1[i0] -> Stmt_S2[-p + i0] : -; VALUE-DAG: p <= 190 ; VALUE-DAG: i0 >= p ; VALUE-DAG: i0 <= 9 + p ; VALUE-DAG: i0 <= 99 diff --git a/polly/test/Isl/Ast/alias_simple_1.ll b/polly/test/Isl/Ast/alias_simple_1.ll index 9fc41279c27..273c597d6e2 100644 --- a/polly/test/Isl/Ast/alias_simple_1.ll +++ b/polly/test/Isl/Ast/alias_simple_1.ll @@ -12,11 +12,11 @@ ; A[i] = B[i]; ; } ; -; NOAA: if (N <= 1024 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) -; BASI: if (N <= 1024 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) -; TBAA: if (N <= 1024) -; SCEV: if (N <= 1024 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) -; GLOB: if (N <= 1024 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) +; NOAA: if (1 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) +; BASI: if (1 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) +; TBAA: if (1) +; SCEV: if (1 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) +; GLOB: if (1 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/Ast/alias_simple_2.ll b/polly/test/Isl/Ast/alias_simple_2.ll index b32af7815b1..6974824c0f0 100644 --- a/polly/test/Isl/Ast/alias_simple_2.ll +++ b/polly/test/Isl/Ast/alias_simple_2.ll @@ -12,11 +12,11 @@ ; A[i] = B[i]; ; } ; -; NOAA: if (N <= 1024 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) -; BASI: if (N <= 1024) -; TBAA: if (N <= 1024 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) -; SCEV: if (N <= 1024 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) -; GLOB: if (N <= 1024 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) +; NOAA: if (1 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) +; BASI: if (1) +; TBAA: if (1 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) +; SCEV: if (1 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) +; GLOB: if (1 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/Ast/alias_simple_3.ll b/polly/test/Isl/Ast/alias_simple_3.ll index d93761bebfe..26cb4d5b0ba 100644 --- a/polly/test/Isl/Ast/alias_simple_3.ll +++ b/polly/test/Isl/Ast/alias_simple_3.ll @@ -12,11 +12,11 @@ ; A[i] = B[i]; ; } ; -; NOAA: if (N <= 1024 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) -; BASI: if (N <= 1024) -; TBAA: if (N <= 1024) -; SCEV: if (N <= 1024 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) -; GLOB: if (N <= 1024 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) +; NOAA: if (1 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) +; BASI: if (1) +; TBAA: if (1) +; SCEV: if (1 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) +; GLOB: if (1 && (&MemRef_B[N] <= &MemRef_A[0] || &MemRef_A[N] <= &MemRef_B[0])) ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/Ast/assumed_context_empty_domain_restriction.ll b/polly/test/Isl/CodeGen/scalar_codegen_crash.ll index 92f9499c1b8..c2e9b0465c2 100644 --- a/polly/test/Isl/Ast/assumed_context_empty_domain_restriction.ll +++ b/polly/test/Isl/CodeGen/scalar_codegen_crash.ll @@ -1,22 +1,10 @@ -; RUN: opt %loadPolly -analyze -polly-detect -polly-detect-unprofitable < %s | \ -; RUN: FileCheck %s --check-prefix=DETECT -; RUN: opt %loadPolly -analyze -polly-scops -polly-detect-unprofitable < %s | \ -; RUN: FileCheck %s --check-prefix=INFO -; RUN: opt %loadPolly -analyze -polly-ast -polly-detect-unprofitable < %s | \ -; RUN: FileCheck %s -; -; This test used to crash the scalar code generation, now we will bail out after -; ScopInfo and destory the ScoP as the runtime context is empty. -; -; DETECT: Valid Region for Scop -; -; INFO-NOT: Context: -; INFO-NOT: Assumed Context: -; -; CHECK-NOT: isl ast -; CHECK-NOT: if ( -; CHECK-NOT: original code -; +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit \ +; RUN: -polly-codegen -S < %s | FileCheck %s + +; This test cases used to crash the scalar code generation. Check that we +; can generate code for it. + +; CHECK: polly.start @endposition = external global i32, align 4 @Bit = external global [0 x i32], align 4 @Init = external global [0 x i32], align 4 diff --git a/polly/test/ScopInfo/assume_gep_bounds.ll b/polly/test/ScopInfo/assume_gep_bounds.ll index b850179415f..0571b078cbe 100644 --- a/polly/test/ScopInfo/assume_gep_bounds.ll +++ b/polly/test/ScopInfo/assume_gep_bounds.ll @@ -20,8 +20,6 @@ ; CHECK-DAG: p <= 30 ; CHECK-DAG: and ; CHECK-DAG: m <= 20 -; CHECK-DAG: and -; CHECK-DAG: p <= 2305843009213694582 - 600n - 30m ; CHECK: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/ScopInfo/loop_affine_bound_0.ll b/polly/test/ScopInfo/loop_affine_bound_0.ll index 52269dc82b4..073d226cbd1 100644 --- a/polly/test/ScopInfo/loop_affine_bound_0.ll +++ b/polly/test/ScopInfo/loop_affine_bound_0.ll @@ -67,5 +67,5 @@ return: ; preds = %bb.nph8, %bb3, %ent ; CHECK: Schedule := ; CHECK: [N, M] -> { Stmt_bb1[i0, i1] -> [i0, i1] }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] -; CHECK: [N, M] -> { Stmt_bb1[i0, i1] -> MemRef_a[i0 + 128i1] }; +; CHECK: [N, M] -> { Stmt_bb1[i0, i1] -> MemRef_a[i1, i0] }; ; CHECK: } diff --git a/polly/test/ScopInfo/loop_affine_bound_1.ll b/polly/test/ScopInfo/loop_affine_bound_1.ll index 0201a4d4a07..8a8319f66f8 100644 --- a/polly/test/ScopInfo/loop_affine_bound_1.ll +++ b/polly/test/ScopInfo/loop_affine_bound_1.ll @@ -70,5 +70,5 @@ return: ; preds = %bb3, %entry ; CHECK-DAG: i1 <= 1 + 5N ; CHECK: } ; CHECK: MustWriteAccess := [Reduction Type: NONE] -; CHECK: [N, M] -> { Stmt_bb1[i0, i1] -> MemRef_a[129i0 + 128i1] }; +; CHECK: [N, M] -> { Stmt_bb1[i0, i1] -> MemRef_a[i1, 129i0] }; ; CHECK: } diff --git a/polly/test/ScopInfo/loop_affine_bound_2.ll b/polly/test/ScopInfo/loop_affine_bound_2.ll index 3f6835ec950..bc92d6df7ee 100644 --- a/polly/test/ScopInfo/loop_affine_bound_2.ll +++ b/polly/test/ScopInfo/loop_affine_bound_2.ll @@ -77,5 +77,5 @@ return: ; preds = %bb3, %entry ; CHECK: Schedule := ; CHECK: [N, M] -> { Stmt_bb1[i0, i1] -> [i0, i1] ; CHECK: MustWriteAccess := [Reduction Type: NONE] -; CHECK: [N, M] -> { Stmt_bb1[i0, i1] -> MemRef_a[-1152 + 768M + 897i0 + 128i1] }; +; CHECK: [N, M] -> { Stmt_bb1[i0, i1] -> MemRef_a[-9 + 6M + i1, 897i0] ; CHECK: } diff --git a/polly/test/ScopInfo/stride_detection.ll b/polly/test/ScopInfo/stride_detection.ll index 2c43b027d82..8fade29f0d8 100644 --- a/polly/test/ScopInfo/stride_detection.ll +++ b/polly/test/ScopInfo/stride_detection.ll @@ -10,20 +10,13 @@ ; Stmt_for_body_3(32 * c0 + 4 * c2 + c4, 32 * c1 + c3); ; CHECK: polly.stmt.for.body.3: ; preds = %polly.loop_header18 -; CHECK: %scevgep = getelementptr [1024 x double], [1024 x double]* %A, i64 0, i64 %21 -; CHECK: %_p_vec_p = bitcast double* %scevgep to <1 x double>* ; CHECK: %_p_splat_one = load <1 x double>, <1 x double>* %_p_vec_p, align 8, !alias.scope !1, !noalias !3, !llvm.mem.parallel_loop_access !0 -; CHECK: %_p_splat = shufflevector <1 x double> %_p_splat_one, <1 x double> %_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %scevgep26 = getelementptr [1024 x double], [1024 x double]* %C, i64 0, i64 %19 -; CHECK: %vector_ptr = bitcast double* %scevgep26 to <4 x double>* ; CHECK: %_p_vec_full = load <4 x double>, <4 x double>* %vector_ptr, align 8, !alias.scope !4, !noalias !5, !llvm.mem.parallel_loop_access !0 -; CHECK: %addp_vec = fadd <4 x double> %_p_splat, %_p_vec_full -; CHECK: %40 = extractelement <4 x double> %addp_vec, i32 0 -; CHECK: %41 = extractelement <4 x double> %addp_vec, i32 1 -; CHECK: %42 = extractelement <4 x double> %addp_vec, i32 2 -; CHECK: %43 = extractelement <4 x double> %addp_vec, i32 3 -; CHECK: %vector_ptr27 = bitcast double* %scevgep26 to <4 x double>* -; CHECK: store <4 x double> %addp_vec, <4 x double>* %vector_ptr27, align 8, !alias.scope !4, !noalias !5, !llvm.mem.parallel_loop_access !0 +; CHECK: extractelement <4 x double> %addp_vec, i32 0 +; CHECK: extractelement <4 x double> %addp_vec, i32 1 +; CHECK: extractelement <4 x double> %addp_vec, i32 2 +; CHECK: extractelement <4 x double> %addp_vec, i32 3 +; CHECK: store <4 x double> %addp_vec, <4 x double>* {{.*}}, align 8, !alias.scope !4, !noalias !5, !llvm.mem.parallel_loop_access !0 define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, [1024 x double]* %C, [1024 x double]* %A) #0 { entry: |