summaryrefslogtreecommitdiffstats
path: root/openmp
diff options
context:
space:
mode:
authorJonathan Peyton <jonathan.l.peyton@intel.com>2016-04-18 21:38:29 +0000
committerJonathan Peyton <jonathan.l.peyton@intel.com>2016-04-18 21:38:29 +0000
commit5235a1b603db838df51c91782ca98155d892ff9f (patch)
treebecf0fe49a098ddc803ba2b0b06cc08d09045849 /openmp
parentd8ce87f850ad4b04770b3af7b18ae2dbefd5f012 (diff)
downloadbcm5719-llvm-5235a1b603db838df51c91782ca98155d892ff9f.tar.gz
bcm5719-llvm-5235a1b603db838df51c91782ca98155d892ff9f.zip
Fix trip count calculation for parallel loops in runtime
The trip count calculation was incorrect for loops with large bounds. For example, for(int i=-2,000,000,000; i < 2,000,000,000; i+=50000000), the trip count calculation had overflow (trying to calculate 2,000,000,000 + 2,000,000,000 with signed integers) and wasn't giving the right value. This patch fixes this error in the runtime by using unsigned integers instead. There is still a bug in the clang compiler component because it warns that there is overflow in the test case file when there isn't. This error isn't there for the Intel Compiler. So for now, the test case is designated as XFAIL. Differential Revision: http://reviews.llvm.org/D19078 llvm-svn: 266677
Diffstat (limited to 'openmp')
-rw-r--r--openmp/runtime/src/kmp_dispatch.cpp44
-rw-r--r--openmp/runtime/src/kmp_sched.cpp24
-rw-r--r--openmp/runtime/test/worksharing/for/omp_for_bigbounds.c67
3 files changed, 109 insertions, 26 deletions
diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp
index 23d736a1b5a..9c3974a2f4b 100644
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -757,24 +757,29 @@ __kmp_dispatch_init(
);
}
}
-
- tc = ( ub - lb + st );
- if ( st != 1 ) {
- if ( st < 0 ) {
- if ( lb < ub ) {
- tc = 0; // zero-trip
- } else { // lb >= ub
- tc = (ST)tc / st; // convert to signed division
- }
- } else { // st > 0
- if ( ub < lb ) {
- tc = 0; // zero-trip
- } else { // lb >= ub
- tc /= st;
- }
+ // compute trip count
+ if ( st == 1 ) { // most common case
+ if ( ub >= lb ) {
+ tc = ub - lb + 1;
+ } else { // ub < lb
+ tc = 0; // zero-trip
+ }
+ } else if ( st < 0 ) {
+ if ( lb >= ub ) {
+ // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B),
+ // where the division needs to be unsigned regardless of the result type
+ tc = (UT)(lb - ub) / (-st) + 1;
+ } else { // lb < ub
+ tc = 0; // zero-trip
+ }
+ } else { // st > 0
+ if ( ub >= lb ) {
+ // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B),
+ // where the division needs to be unsigned regardless of the result type
+ tc = (UT)(ub - lb) / st + 1;
+ } else { // ub < lb
+ tc = 0; // zero-trip
}
- } else if ( ub < lb ) { // st == 1
- tc = 0; // zero-trip
}
// Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing
@@ -2255,8 +2260,11 @@ __kmp_dist_get_bounds(
trip_count = *pupper - *plower + 1;
} else if(incr == -1) {
trip_count = *plower - *pupper + 1;
+ } else if ( incr > 0 ) {
+ // upper-lower can exceed the limit of signed type
+ trip_count = (UT)(*pupper - *plower) / incr + 1;
} else {
- trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
+ trip_count = (UT)(*plower - *pupper) / ( -incr ) + 1;
}
if( trip_count <= nteams ) {
diff --git a/openmp/runtime/src/kmp_sched.cpp b/openmp/runtime/src/kmp_sched.cpp
index 5a421d700ad..a09aa868173 100644
--- a/openmp/runtime/src/kmp_sched.cpp
+++ b/openmp/runtime/src/kmp_sched.cpp
@@ -244,12 +244,11 @@ __kmp_for_static_init(
trip_count = *pupper - *plower + 1;
} else if (incr == -1) {
trip_count = *plower - *pupper + 1;
+ } else if ( incr > 0 ) {
+ // upper-lower can exceed the limit of signed type
+ trip_count = (UT)(*pupper - *plower) / incr + 1;
} else {
- if ( incr > 1 ) { // the check is needed for unsigned division when incr < 0
- trip_count = (*pupper - *plower) / incr + 1;
- } else {
- trip_count = (*plower - *pupper) / ( -incr ) + 1;
- }
+ trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
}
if ( __kmp_env_consistency_check ) {
@@ -447,8 +446,11 @@ __kmp_dist_for_static_init(
trip_count = *pupper - *plower + 1;
} else if(incr == -1) {
trip_count = *plower - *pupper + 1;
+ } else if ( incr > 0 ) {
+ // upper-lower can exceed the limit of signed type
+ trip_count = (UT)(*pupper - *plower) / incr + 1;
} else {
- trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
+ trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
}
*pstride = *pupper - *plower; // just in case (can be unused)
@@ -514,8 +516,11 @@ __kmp_dist_for_static_init(
trip_count = *pupperDist - *plower + 1;
} else if(incr == -1) {
trip_count = *plower - *pupperDist + 1;
+ } else if ( incr > 1 ) {
+ // upper-lower can exceed the limit of signed type
+ trip_count = (UT)(*pupperDist - *plower) / incr + 1;
} else {
- trip_count = (ST)(*pupperDist - *plower) / incr + 1;
+ trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
}
KMP_DEBUG_ASSERT( trip_count );
switch( schedule ) {
@@ -684,8 +689,11 @@ __kmp_team_static_init(
trip_count = upper - lower + 1;
} else if(incr == -1) {
trip_count = lower - upper + 1;
+ } else if ( incr > 0 ) {
+ // upper-lower can exceed the limit of signed type
+ trip_count = (UT)(upper - lower) / incr + 1;
} else {
- trip_count = (ST)(upper - lower) / incr + 1; // cast to signed to cover incr<0 case
+ trip_count = (UT)(lower - upper) / (-incr) + 1;
}
if( chunk < 1 )
chunk = 1;
diff --git a/openmp/runtime/test/worksharing/for/omp_for_bigbounds.c b/openmp/runtime/test/worksharing/for/omp_for_bigbounds.c
new file mode 100644
index 00000000000..82f579b83a9
--- /dev/null
+++ b/openmp/runtime/test/worksharing/for/omp_for_bigbounds.c
@@ -0,0 +1,67 @@
+// RUN: %libomp-compile -DMY_SCHEDULE=static && %libomp-run
+// RUN: %libomp-compile -DMY_SCHEDULE=dynamic && %libomp-run
+// RUN: %libomp-compile -DMY_SCHEDULE=guided && %libomp-run
+// XFAIL: *
+/*
+ * Test that large bounds are handled properly and calculations of
+ * loop iterations don't accidently overflow
+ */
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "omp_testsuite.h"
+
+#define INCR 50000000
+#define MY_MAX 2000000000
+#define MY_MIN -2000000000
+#ifndef MY_SCHEDULE
+# define MY_SCHEDULE static
+#endif
+
+int a, b, a_known_value, b_known_value;
+
+int test_omp_for_bigbounds()
+{
+ a = 0;
+ b = 0;
+ #pragma omp parallel
+ {
+ int i;
+ #pragma omp for schedule(MY_SCHEDULE)
+ for (i = INT_MIN; i < MY_MAX; i+=INCR) {
+ #pragma omp atomic
+ a++;
+ }
+ #pragma omp for schedule(MY_SCHEDULE)
+ for (i = INT_MAX; i >= MY_MIN; i-=INCR) {
+ #pragma omp atomic
+ b++;
+ }
+ }
+ printf("a = %d (should be %d), b = %d (should be %d)\n", a, a_known_value, b, b_known_value);
+ return (a == a_known_value && b == b_known_value);
+}
+
+int main()
+{
+ int i;
+ int num_failed=0;
+
+ a_known_value = 0;
+ for (i = INT_MIN; i < MY_MAX; i+=INCR) {
+ a_known_value++;
+ }
+
+ b_known_value = 0;
+ for (i = INT_MAX; i >= MY_MIN; i-=INCR) {
+ b_known_value++;
+ }
+
+ for(i = 0; i < REPETITIONS; i++) {
+ if(!test_omp_for_bigbounds()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
OpenPOWER on IntegriCloud