summaryrefslogtreecommitdiffstats
path: root/clang/test/OpenMP/for_simd_codegen.cpp
diff options
context:
space:
mode:
authorGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>2019-01-09 20:45:26 +0000
committerGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>2019-01-09 20:45:26 +0000
commit67796064c76e921de8a5c07a25cecafc215d71e2 (patch)
treef2a18b2311c2b01ab6f05f1607978a06e7845b4d /clang/test/OpenMP/for_simd_codegen.cpp
parenta3afcf24453123abe7c1ba3d984b19cd64b206a7 (diff)
downloadbcm5719-llvm-67796064c76e921de8a5c07a25cecafc215d71e2.tar.gz
bcm5719-llvm-67796064c76e921de8a5c07a25cecafc215d71e2.zip
[OpenMP] Avoid remainder operations for loop index values on a collapsed loop nest.
Summary: Change the strategy for computing loop index variables after collapsing a loop nest via the collapse clause by replacing the expensive remainder operation with multiplications and additions. Reviewers: ABataev, caomhin Reviewed By: ABataev Subscribers: guansong, arphaman, cfe-commits Differential Revision: https://reviews.llvm.org/D56413 llvm-svn: 350759
Diffstat (limited to 'clang/test/OpenMP/for_simd_codegen.cpp')
-rw-r--r--clang/test/OpenMP/for_simd_codegen.cpp73
1 files changed, 62 insertions, 11 deletions
diff --git a/clang/test/OpenMP/for_simd_codegen.cpp b/clang/test/OpenMP/for_simd_codegen.cpp
index a4c148e7d2c..c36f52704b7 100644
--- a/clang/test/OpenMP/for_simd_codegen.cpp
+++ b/clang/test/OpenMP/for_simd_codegen.cpp
@@ -408,7 +408,10 @@ int templ1(T a, T *z) {
// CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32
// CHECK-NEXT: store i32 [[I_2]], i32*
// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]
-// CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4
+// CHECK-NEXT: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]]
+// CHECK-NEXT: [[DIV_2:%.+]] = sdiv i64 [[IV2_1]], 4
+// CHECK-NEXT: [[MUL_2:%.+]] = mul nsw i64 [[DIV_2]], 4
+// CHECK-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[MUL_2]]
// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2
// CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]]
// CHECK-NEXT: store i64 [[J_2_ADD0]], i64*
@@ -556,22 +559,70 @@ void collapsed(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1
// CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]]
// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
+
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]
-// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20
-// CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3
+// CHECK: [[IV1_2_1:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2_1]], 60
+// CHECK-NEXT: [[MUL_1:%.+]] = mul i32 [[CALC_J_1]], 60
+// CHECK-NEXT: [[SUB_3:%.+]] = sub i32 [[IV1_2]], [[MUL_1]]
+// CHECK-NEXT: [[CALC_J_2:%.+]] = udiv i32 [[SUB_3]], 20
// CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1
// CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]]
// CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]]
+
// CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]
-// CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5
-// CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4
-// CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1
-// CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]]
-// CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]]
+// CHECK: [[IV1_3_1:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK-NEXT: [[DIV_1:%.+]] = udiv i32 [[IV1_3_1]], 60
+// CHECK-NEXT: [[MUL_2:%.+]] = mul i32 [[DIV_1]], 60
+// CHECK-NEXT: [[ADD_3:%.+]] = sub i32 [[IV1_3]], [[MUL_2]]
+
// CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]
-// CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5
-// CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1
-// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]]
+// CHECK: [[IV1_4_1:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK-NEXT: [[DIV_2:%.+]] = udiv i32 [[IV1_4_1]], 60
+// CHECK-NEXT: [[MUL_3:%.+]] = mul i32 [[DIV_2]], 60
+// CHECK-NEXT: [[SUB_6:%.+]] = sub i32 [[IV1_4]], [[MUL_3]]
+// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[SUB_6]], 20
+// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 20
+// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[ADD_3]], [[MUL_4]]
+// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[SUB_7]], 5
+// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 1
+// CHECK-NEXT: [[ADD_6:%.+]] = add i32 3, [[MUL_5]]
+// CHECK-NEXT: store i32 [[ADD_6]], i32* [[LC_K:.+]]
+
+// CHECK: [[IV1_5:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK: [[IV1_5_1:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[IV1_5_1]], 60
+// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 60
+// CHECK-NEXT: [[ADD_7:%.+]] = sub i32 [[IV1_5]], [[MUL_6]]
+
+// CHECK: [[IV1_6:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK: [[IV1_6_1:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK-NEXT: [[DIV_6:%.+]] = udiv i32 [[IV1_6_1]], 60
+// CHECK-NEXT: [[MUL_7:%.+]] = mul i32 [[DIV_6]], 60
+// CHECK-NEXT: [[SUB_10:%.+]] = sub i32 [[IV1_6]], [[MUL_7]]
+// CHECK-NEXT: [[DIV_7:%.+]] = udiv i32 [[SUB_10]], 20
+// CHECK-NEXT: [[MUL_8:%.+]] = mul i32 [[DIV_7]], 20
+// CHECK-NEXT: [[ADD_9:%.+]] = sub i32 [[ADD_7]], [[MUL_8]]
+
+// CHECK: [[IV1_7:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK: [[IV1_7_1:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK-NEXT: [[DIV_8:%.+]] = udiv i32 [[IV1_7_1]], 60
+// CHECK-NEXT: [[MUL_9:%.+]] = mul i32 [[DIV_8]], 60
+// CHECK-NEXT: [[ADD_10:%.+]] = sub i32 [[IV1_7]], [[MUL_9]]
+
+// CHECK: [[IV1_8:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK: [[IV1_8_1:%.+]] = load i32, i32* [[OMP_IV]]
+// CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[IV1_8_1]], 60
+// CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 60
+// CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_8]], [[MUL_4]]
+// CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[SUB_7]], 20
+// CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 20
+// CHECK-NEXT: [[SUB_8:%.+]] = sub i32 [[ADD_10]], [[MUL_5]]
+// CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[SUB_8]], 5
+// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 5
+// CHECK-NEXT: [[SUB_9:%.+]] = sub i32 [[ADD_9]], [[MUL_6]]
+// CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[SUB_9]], 1
+// CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[MUL_6]]
// CHECK-NEXT: [[CALC_L_3:%.+]] = trunc i32 [[CALC_L_2]] to i16
// CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]]
// ... loop body ...
OpenPOWER on IntegriCloud