diff options
| author | Tobias Grosser <tobias@grosser.es> | 2014-04-15 22:30:06 +0000 |
|---|---|---|
| committer | Tobias Grosser <tobias@grosser.es> | 2014-04-15 22:30:06 +0000 |
| commit | 75b76729ab0adb5931843df585503973fa1c65a1 (patch) | |
| tree | 1801749c53ea443eb5505204bb48a8e736488d18 | |
| parent | 43855af9a7d8e29b2727755bb38de985697774df (diff) | |
| download | bcm5719-llvm-75b76729ab0adb5931843df585503973fa1c65a1.tar.gz bcm5719-llvm-75b76729ab0adb5931843df585503973fa1c65a1.zip | |
Fix for vector codegen in OpenMP subfunctions
Contributed-by: Johannes Doerfert <doerfert@cs.uni-saarland.de>
llvm-svn: 206332
| -rw-r--r-- | polly/lib/CodeGen/CodeGeneration.cpp | 7 | ||||
| -rw-r--r-- | polly/test/Cloog/CodeGen/OpenMP/vector-codegen-in-openmp-subfunc.ll | 48 |
2 files changed, 55 insertions, 0 deletions
diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp index e2efeff918c..8b8dc790970 100644 --- a/polly/lib/CodeGen/CodeGeneration.cpp +++ b/polly/lib/CodeGen/CodeGeneration.cpp @@ -481,6 +481,13 @@ void ClastStmtCodeGen::codegen(const clast_user_stmt *u, } } + // Copy the current value map into all vector maps if the key wasn't + // available yet. This is needed in case vector codegen is performed in + // OpenMP subfunctions. + for (auto KV : ValueMap) + for (int i = 0; i < VectorDimensions; ++i) + VectorMap[i].insert(KV); + isl_map *Schedule = extractPartialSchedule(Statement, Domain); VectorBlockGenerator::generate(Builder, *Statement, VectorMap, VLTS, Schedule, P); diff --git a/polly/test/Cloog/CodeGen/OpenMP/vector-codegen-in-openmp-subfunc.ll b/polly/test/Cloog/CodeGen/OpenMP/vector-codegen-in-openmp-subfunc.ll new file mode 100644 index 00000000000..286050665ea --- /dev/null +++ b/polly/test/Cloog/CodeGen/OpenMP/vector-codegen-in-openmp-subfunc.ll @@ -0,0 +1,48 @@ +; RUN: opt %loadPolly -basicaa -polly-vectorizer=polly -enable-polly-openmp -polly-opt-isl -polly-codegen < %s + +; void f(int *A, int a, int b) { +; int local = a > b ? a : b; +; int i; +; for (i = 0; i < 100; i++) { +; A[i] += local; +; } +; } +; + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i32* %A, i32 %a, i32 %b) { +entry: + %cmp = icmp sgt i32 %a, %b + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32 [ %a, %cond.true ], [ %b, %cond.false ] + br label %for.cond + +for.cond: ; preds = %for.inc, %cond.end + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %cond.end ] + %exitcond = icmp ne i64 %indvars.iv, 100 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %cond + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} |

