summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp_dispatch.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/runtime/src/kmp_dispatch.cpp')
-rw-r--r--openmp/runtime/src/kmp_dispatch.cpp44
1 files changed, 26 insertions, 18 deletions
diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp
index 23d736a1b5a..9c3974a2f4b 100644
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -757,24 +757,29 @@ __kmp_dispatch_init(
);
}
}
-
- tc = ( ub - lb + st );
- if ( st != 1 ) {
- if ( st < 0 ) {
- if ( lb < ub ) {
- tc = 0; // zero-trip
- } else { // lb >= ub
- tc = (ST)tc / st; // convert to signed division
- }
- } else { // st > 0
- if ( ub < lb ) {
- tc = 0; // zero-trip
- } else { // lb >= ub
- tc /= st;
- }
+ // compute trip count
+ if ( st == 1 ) { // most common case
+ if ( ub >= lb ) {
+ tc = ub - lb + 1;
+ } else { // ub < lb
+ tc = 0; // zero-trip
+ }
+ } else if ( st < 0 ) {
+ if ( lb >= ub ) {
+ // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B),
+ // where the division needs to be unsigned regardless of the result type
+ tc = (UT)(lb - ub) / (-st) + 1;
+ } else { // lb < ub
+ tc = 0; // zero-trip
+ }
+ } else { // st > 0
+ if ( ub >= lb ) {
+ // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B),
+ // where the division needs to be unsigned regardless of the result type
+ tc = (UT)(ub - lb) / st + 1;
+ } else { // ub < lb
+ tc = 0; // zero-trip
}
- } else if ( ub < lb ) { // st == 1
- tc = 0; // zero-trip
}
// Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing
@@ -2255,8 +2260,11 @@ __kmp_dist_get_bounds(
trip_count = *pupper - *plower + 1;
} else if(incr == -1) {
trip_count = *plower - *pupper + 1;
+ } else if ( incr > 0 ) {
+ // upper-lower can exceed the limit of signed type
+ trip_count = (UT)(*pupper - *plower) / incr + 1;
} else {
- trip_count = (ST)(*pupper - *plower) / incr + 1; // cast to signed to cover incr<0 case
+ trip_count = (UT)(*plower - *pupper) / ( -incr ) + 1;
}
if( trip_count <= nteams ) {
OpenPOWER on IntegriCloud