summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJonas Hahnfeld <hahnjo@hahnjo.de>2018-09-30 09:23:21 +0000
committerJonas Hahnfeld <hahnjo@hahnjo.de>2018-09-30 09:23:21 +0000
commit1bf767fb8e6066bd87560378c23185a412fb2538 (patch)
treed859ed828601dd79a3abfa13ce65f7446ab24722
parent067235f227f541816af8aa777bdf1b214c22c77b (diff)
downloadbcm5719-llvm-1bf767fb8e6066bd87560378c23185a412fb2538.tar.gz
bcm5719-llvm-1bf767fb8e6066bd87560378c23185a412fb2538.zip
[libomptarget-nvptx] Align data sharing stack
NVPTX requires addresses of pointer locations to be 8-byte aligned or there will be an exception during runtime. This could happen without this patch as shown in the added test: getId() requires 4 byte of stack and putValueInParallel() uses 16 bytes to store the addresses of the captured variables. Differential Revision: https://reviews.llvm.org/D52655 llvm-svn: 343402
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu7
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c55
2 files changed, 62 insertions, 0 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index 2125c36e1d8..c7b9bdf9a9b 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -384,6 +384,13 @@ EXTERN void* __kmpc_data_sharing_push_stack(size_t DataSize,
return omptarget_nvptx_SimpleThreadPrivateContext::Allocate(DataSize);
}
+ // Add worst-case padding to DataSize so that future stack allocations are
+ // correctly aligned.
+ const size_t Alignment = 8;
+ if (DataSize % Alignment != 0) {
+ DataSize += (Alignment - DataSize % Alignment);
+ }
+
// Frame pointer must be visible to all workers in the same warp.
unsigned WID = getWarpId();
void *&FrameP = DataSharingState.FramePtr[WID];
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c b/openmp/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c
new file mode 100644
index 00000000000..dd17ae7c6a7
--- /dev/null
+++ b/openmp/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c
@@ -0,0 +1,55 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+#pragma omp declare target
+static void putValueInParallel(int *ptr, int value) {
+ #pragma omp parallel
+ {
+ *ptr = value;
+ }
+}
+
+static int getId() {
+ int id;
+ putValueInParallel(&id, omp_get_thread_num());
+ return id;
+}
+#pragma omp end declare target
+
+const int MaxThreads = 1024;
+const int Threads = 64;
+
+int main(int argc, char *argv[]) {
+ int master;
+ int check[MaxThreads];
+ for (int i = 0; i < MaxThreads; i++) {
+ check[i] = 0;
+ }
+
+ #pragma omp target map(master, check[:])
+ {
+ master = getId();
+
+ #pragma omp parallel num_threads(Threads)
+ {
+ check[omp_get_thread_num()] = getId();
+ }
+ }
+
+ // CHECK: master = 0.
+ printf("master = %d.\n", master);
+ // CHECK-NOT: invalid
+ for (int i = 0; i < MaxThreads; i++) {
+ if (i < Threads) {
+ if (check[i] != i) {
+ printf("invalid: check[%d] should be %d, is %d\n", i, i, check[i]);
+ }
+ } else if (check[i] != 0) {
+ printf("invalid: check[%d] should be 0, is %d\n", i, check[i]);
+ }
+ }
+
+ return 0;
+}
OpenPOWER on IntegriCloud