summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTobias Grosser <tobias@grosser.es>2014-04-15 22:30:06 +0000
committerTobias Grosser <tobias@grosser.es>2014-04-15 22:30:06 +0000
commit75b76729ab0adb5931843df585503973fa1c65a1 (patch)
tree1801749c53ea443eb5505204bb48a8e736488d18
parent43855af9a7d8e29b2727755bb38de985697774df (diff)
downloadbcm5719-llvm-75b76729ab0adb5931843df585503973fa1c65a1.tar.gz
bcm5719-llvm-75b76729ab0adb5931843df585503973fa1c65a1.zip
Fix for vector codegen in OpenMP subfunctions
Contributed-by: Johannes Doerfert <doerfert@cs.uni-saarland.de> llvm-svn: 206332
-rw-r--r--polly/lib/CodeGen/CodeGeneration.cpp7
-rw-r--r--polly/test/Cloog/CodeGen/OpenMP/vector-codegen-in-openmp-subfunc.ll48
2 files changed, 55 insertions, 0 deletions
diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp
index e2efeff918c..8b8dc790970 100644
--- a/polly/lib/CodeGen/CodeGeneration.cpp
+++ b/polly/lib/CodeGen/CodeGeneration.cpp
@@ -481,6 +481,13 @@ void ClastStmtCodeGen::codegen(const clast_user_stmt *u,
}
}
+ // Copy the current value map into all vector maps if the key wasn't
+ // available yet. This is needed in case vector codegen is performed in
+ // OpenMP subfunctions.
+ for (auto KV : ValueMap)
+ for (int i = 0; i < VectorDimensions; ++i)
+ VectorMap[i].insert(KV);
+
isl_map *Schedule = extractPartialSchedule(Statement, Domain);
VectorBlockGenerator::generate(Builder, *Statement, VectorMap, VLTS, Schedule,
P);
diff --git a/polly/test/Cloog/CodeGen/OpenMP/vector-codegen-in-openmp-subfunc.ll b/polly/test/Cloog/CodeGen/OpenMP/vector-codegen-in-openmp-subfunc.ll
new file mode 100644
index 00000000000..286050665ea
--- /dev/null
+++ b/polly/test/Cloog/CodeGen/OpenMP/vector-codegen-in-openmp-subfunc.ll
@@ -0,0 +1,48 @@
+; RUN: opt %loadPolly -basicaa -polly-vectorizer=polly -enable-polly-openmp -polly-opt-isl -polly-codegen < %s
+
+; void f(int *A, int a, int b) {
+; int local = a > b ? a : b;
+; int i;
+; for (i = 0; i < 100; i++) {
+; A[i] += local;
+; }
+; }
+;
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i32* %A, i32 %a, i32 %b) {
+entry:
+ %cmp = icmp sgt i32 %a, %b
+ br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true: ; preds = %entry
+ br label %cond.end
+
+cond.false: ; preds = %entry
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %cond.true
+ %cond = phi i32 [ %a, %cond.true ], [ %b, %cond.false ]
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %cond.end
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %cond.end ]
+ %exitcond = icmp ne i64 %indvars.iv, 100
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %tmp = load i32* %arrayidx, align 4
+ %add = add nsw i32 %tmp, %cond
+ store i32 %add, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
OpenPOWER on IntegriCloud