[OPENMP 4.1] Improved codegen for 'uval' qualifier of 'linear' clause.

According to standard the 'uval' modifier declares the address of the original list item to have an invariant value for all iterations of the associated loop(s). Patch improves codegen for this qualifier by removing usage of the original reference variable and replacing by referenced l-value. llvm-svn: 245674
author: Alexey Bataev <a.bataev@hotmail.com> 2015-08-21 06:41:23 +0000
committer: Alexey Bataev <a.bataev@hotmail.com> 2015-08-21 06:41:23 +0000
commit: 84cfb1dcdeb694a10dce9927ec5f0e159e46ffa9 (patch)
tree: 87de1a47d363ad9cda798a6ba80d5bebb4342bcd /clang
parent: 187155360e392995c92cc2e266a6cfec953595b7 (diff)
download: bcm5719-llvm-84cfb1dcdeb694a10dce9927ec5f0e159e46ffa9.tar.gz
bcm5719-llvm-84cfb1dcdeb694a10dce9927ec5f0e159e46ffa9.zip
2 files changed, 144 insertions, 5 deletions
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index d2653b7cb90..5d86cc23f00 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -6423,7 +6423,12 @@ OMPClause *Sema::ActOnOpenMPLinearClause(
         *this, Private, DE->getType().getUnqualifiedType(), DE->getExprLoc());
     // Build var to save initial value.
     VarDecl *Init = buildVarDecl(*this, ELoc, QType, ".linear.start");
-    AddInitializerToDecl(Init, DefaultLvalueConversion(DE).get(),
+    Expr *InitExpr;
+    if (LinKind == OMPC_LINEAR_uval)
+      InitExpr = VD->getInit();
+    else
+      InitExpr = DE;
+    AddInitializerToDecl(Init, DefaultLvalueConversion(InitExpr).get(),
                          /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
     auto InitRef = buildDeclRefExpr(
         *this, Init, DE->getType().getUnqualifiedType(), DE->getExprLoc());
@@ -6491,15 +6496,20 @@ static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
   bool HasErrors = false;
   auto CurInit = Clause.inits().begin();
   auto CurPrivate = Clause.privates().begin();
+  auto LinKind = Clause.getModifier();
   for (auto &RefExpr : Clause.varlists()) {
     Expr *InitExpr = *CurInit;
 
     // Build privatized reference to the current linear var.
     auto DE = cast<DeclRefExpr>(RefExpr);
-    auto CapturedRef =
-        buildDeclRefExpr(SemaRef, cast<VarDecl>(DE->getDecl()),
-                         DE->getType().getUnqualifiedType(), DE->getExprLoc(),
-                         /*RefersToCapture=*/true);
+    Expr *CapturedRef;
+    if (LinKind == OMPC_LINEAR_uval)
+      CapturedRef = cast<VarDecl>(DE->getDecl())->getInit();
+    else
+      CapturedRef =
+          buildDeclRefExpr(SemaRef, cast<VarDecl>(DE->getDecl()),
+                           DE->getType().getUnqualifiedType(), DE->getExprLoc(),
+                           /*RefersToCapture=*/true);
 
     // Build update: Var = InitExpr + IV * Step
     ExprResult Update =
diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp
index e67ad5d4c7e..bea46479274 100644
--- a/clang/test/OpenMP/simd_codegen.cpp
+++ b/clang/test/OpenMP/simd_codegen.cpp
@@ -481,6 +481,135 @@ void widened(float *a, float *b, float *c, float *d) {
 // CHECK: ret void
 }
 
+// CHECK-LABEL: define {{.*void}} @{{.*}}linear{{.*}}(float* {{.+}})
+void linear(float *a) {
+  // CHECK: [[VAL_ADDR:%.+]] = alloca i64,
+  // CHECK: [[K_ADDR:%.+]] = alloca i64*,
+  long long val = 0;
+  long long &k = val;
+
+  #pragma omp simd linear(k : 3)
+// CHECK: store i64* [[VAL_ADDR]], i64** [[K_ADDR]],
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP_END]]
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
+//
+
+  #pragma omp simd linear(val(k) : 3)
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP_END]]
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
+//
+  #pragma omp simd linear(uval(k) : 3)
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[VAL_ADDR]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP_END]]
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[VAL_ADDR]]
+//
+}
+
 // TERM_DEBUG-LABEL: bar
 int bar() {return 0;};
author	Alexey Bataev <a.bataev@hotmail.com>	2015-08-21 06:41:23 +0000
committer	Alexey Bataev <a.bataev@hotmail.com>	2015-08-21 06:41:23 +0000
commit	84cfb1dcdeb694a10dce9927ec5f0e159e46ffa9 (patch)
tree	87de1a47d363ad9cda798a6ba80d5bebb4342bcd /clang
parent	187155360e392995c92cc2e266a6cfec953595b7 (diff)
download	bcm5719-llvm-84cfb1dcdeb694a10dce9927ec5f0e159e46ffa9.tar.gz bcm5719-llvm-84cfb1dcdeb694a10dce9927ec5f0e159e46ffa9.zip