diff options
Diffstat (limited to 'polly/test')
223 files changed, 14168 insertions, 0 deletions
diff --git a/polly/test/AffineIterator/loop_static_bound.ll b/polly/test/AffineIterator/loop_static_bound.ll new file mode 100755 index 00000000000..a6193ae05b2 --- /dev/null +++ b/polly/test/AffineIterator/loop_static_bound.ll @@ -0,0 +1,24 @@ +; RUN: opt %loadPolly %defaultOpts -print-scev-affine -analyze < %s | FileCheck %s + +define void @f(i32* nocapture %a) nounwind { +entry: + %0 = tail call i32 (...)* @rnd() nounwind ; <i32> [#uses=2] +; CHECK: 1 * %0 + 0 * 1 + %1 = icmp sgt i32 %0, 0 ; <i1> [#uses=1] + br i1 %1, label %bb, label %return + +bb: ; preds = %bb, %entry + %i.03 = phi i32 [ 0, %entry ], [ %3, %bb ] ; <i32> [#uses=1] +; CHECK: 1 * {0,+,1}<nuw><nsw><%bb> + 0 * 1 + %2 = tail call i32 (...)* @rnd() nounwind ; <i32> [#uses=0] +; CHECK: 1 * %2 + 0 * 1 + %3 = add nsw i32 %i.03, 1 ; <i32> [#uses=2] +; CHECK: 1 * {0,+,1}<nuw><nsw><%bb> + 1 * 1 + %exitcond = icmp eq i32 %3, %0 ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} + +declare i32 @rnd(...) diff --git a/polly/test/AffineIterator/no_tagetdata.ll b/polly/test/AffineIterator/no_tagetdata.ll new file mode 100755 index 00000000000..be4ee867a30 --- /dev/null +++ b/polly/test/AffineIterator/no_tagetdata.ll @@ -0,0 +1,20 @@ +; RUN: opt %loadPolly %defaultOpts -print-scev-affine -analyze < %s | FileCheck %s + +define i32 @f(i64 %a, i64 %b, i64 %c, [8 x i32]* nocapture %x) nounwind readonly { +entry: + %0 = shl i64 %a, 1 ; <i64> [#uses=1] + %1 = add nsw i64 %0, %b ; <i64> [#uses=1] +; CHECK: 1 * %b + 2 * %a + 0 * 1 + %2 = shl i64 %1, 1 ; <i64> [#uses=1] +; CHECK: 2 * %b + 4 * %a + 0 * 1 + %3 = add i64 %2, 2 ; <i64> [#uses=1] + %4 = mul i64 %a, 3 ; <i64> [#uses=1] + %5 = shl i64 %b, 2 ; <i64> [#uses=1] + %6 = add nsw i64 %4, 2 ; <i64> [#uses=1] + %7 = add nsw i64 %6, %c ; <i64> [#uses=1] + %8 = add nsw i64 %7, %5 ; <i64> [#uses=1] + %9 = getelementptr inbounds [8 x i32]* %x, i64 %3, i64 %8 ; <i32*> [#uses=1] +; CHECK: 1 * %x + sizeof(i32) * %c + (35 * sizeof(i32)) * %a + (20 * sizeof(i32)) * %b + (18 * sizeof(i32)) * 1 + %10 = load i32* %9, align 4 ; <i32> [#uses=1] + ret i32 %10 +} diff --git a/polly/test/AffineIterator/no_tagetdata_loop.ll b/polly/test/AffineIterator/no_tagetdata_loop.ll new file mode 100755 index 00000000000..6af3b30af6c --- /dev/null +++ b/polly/test/AffineIterator/no_tagetdata_loop.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly %defaultOpts -print-scev-affine -analyze < %s | FileCheck %s + +define void @f([8 x i32]* nocapture %x) nounwind { +entry: + br label %bb5.preheader + +bb2: ; preds = %bb3.preheader, %bb2 + %k.09 = phi i64 [ 0, %bb3.preheader ], [ %1, %bb2 ] ; <i64> [#uses=2] + %tmp19 = add i64 %k.09, %tmp18 ; <i64> [#uses=1] + %scevgep = getelementptr [8 x i32]* %x, i64 2, i64 %tmp19 ; <i32*> [#uses=1] +; CHECK: sizeof(i32) * {0,+,1}<nuw><nsw><%bb2> + (20 * sizeof(i32)) * {0,+,1}<%bb3.preheader> + (35 * sizeof(i32)) * {0,+,1}<%bb5.preheader> + 1 * %x + (18 * sizeof(i32)) * 1 + %0 = tail call i32 (...)* @rnd() nounwind ; <i32> [#uses=1] + store i32 %0, i32* %scevgep, align 4 + %1 = add nsw i64 %k.09, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %1, 64 ; <i1> [#uses=1] + br i1 %exitcond, label %bb4, label %bb2 + +bb4: ; preds = %bb2 + %2 = add i64 %j.010, 1 ; <i64> [#uses=2] + %exitcond20 = icmp eq i64 %2, 64 ; <i1> [#uses=1] + br i1 %exitcond20, label %bb6, label %bb3.preheader + +bb3.preheader: ; preds = %bb5.preheader, %bb4 + %j.010 = phi i64 [ 0, %bb5.preheader ], [ %2, %bb4 ] ; <i64> [#uses=2] + %tmp21 = mul i64 %j.010, 20 ; <i64> [#uses=1] + %tmp18 = add i64 %tmp21, %tmp23 ; <i64> [#uses=1] + br label %bb2 + +bb6: ; preds = %bb4 + %3 = add i64 %i.012, 1 ; <i64> [#uses=2] + %exitcond25 = icmp eq i64 %3, 64 ; <i1> [#uses=1] + br i1 %exitcond25, label %return, label %bb5.preheader + +bb5.preheader: ; preds = %bb6, %entry + %i.012 = phi i64 [ 0, %entry ], [ %3, %bb6 ] ; <i64> [#uses=2] + %tmp = mul i64 %i.012, 35 ; <i64> [#uses=1] + %tmp23 = add i64 %tmp, 2 ; <i64> [#uses=1] + br label %bb3.preheader + +return: ; preds = %bb6 + ret void +} + +declare i32 @rnd(...) diff --git a/polly/test/AffineIterator/simple_0.ll b/polly/test/AffineIterator/simple_0.ll new file mode 100755 index 00000000000..1c1e836d6ba --- /dev/null +++ b/polly/test/AffineIterator/simple_0.ll @@ -0,0 +1,20 @@ +; RUN: opt %loadPolly %defaultOpts -print-scev-affine -analyze < %s | FileCheck %s + +define i32 @f(i32 %a, i32 %b, i32 %c, i32 %d, i32* nocapture %x) nounwind readnone { +entry: + %0 = shl i32 %a, 1 ; <i32> [#uses=1] +; CHECK: 2 * %a + 0 * 1 + %1 = mul i32 %b, 3 ; <i32> [#uses=1] +; CHECK: 3 * %b + 0 * 1 + %2 = shl i32 %d, 2 ; <i32> [#uses=1] +; CHECK: 4 * %d + 0 * 1 + %3 = add nsw i32 %0, 5 ; <i32> [#uses=1] +; CHECK: 2 * %a + 5 * 1 + %4 = add nsw i32 %3, %c ; <i32> [#uses=1] +; CHECK: 1 * %c + 2 * %a + 5 * 1 + %5 = add nsw i32 %4, %1 ; <i32> [#uses=1] +; CHECK: 1 * %c + 3 * %b + 2 * %a + 5 * 1 + %6 = add nsw i32 %5, %2 ; <i32> [#uses=1] +; CHECK: 1 * %c + 4 * %d + 3 * %b + 2 * %a + 5 * 1 + ret i32 %6 +} diff --git a/polly/test/AffineIterator/simple_1.ll b/polly/test/AffineIterator/simple_1.ll new file mode 100755 index 00000000000..36f57d554ba --- /dev/null +++ b/polly/test/AffineIterator/simple_1.ll @@ -0,0 +1,24 @@ +; RUN: opt %loadPolly %defaultOpts -print-scev-affine -analyze < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @f(i32 %a, i32 %b, i32 %c, i64 %d, i8 signext %e, i32 %f, i32 %g, i32 %h) nounwind readnone { +entry: + %0 = mul i32 %a, 3 ; <i32> [#uses=1] + %1 = mul i32 %b, 5 ; <i32> [#uses=1] + %2 = mul i32 %1, %c ; <i32> [#uses=1] +; CHECK: 5 * (%b * %c) + 0 * 1 + %3 = mul i32 %2, %f ; <i32> [#uses=1] +; CHECK: 5 * (%b * %c * %f) + 0 * 1 + %4 = sext i8 %e to i32 ; <i32> [#uses=1] + %5 = shl i32 %4, 2 ; <i32> [#uses=1] + %6 = trunc i64 %d to i32 ; <i32> [#uses=1] + %7 = mul i32 %6, %h ; <i32> [#uses=1] + %8 = add nsw i32 %0, %g ; <i32> [#uses=1] + %9 = add nsw i32 %8, %5 ; <i32> [#uses=1] + %10 = add nsw i32 %9, %3 ; <i32> [#uses=1] + %11 = add nsw i32 %10, %7 ; <i32> [#uses=1] +; CHECK: 1 * %g + 1 * ((trunc i64 %d to i32) * %h) + 5 * (%b * %c * %f) + 4 * (sext i8 %e to i32) + 3 * %a + 0 * 1 + ret i32 %11 +} diff --git a/polly/test/AffineIterator/simple_loop.ll b/polly/test/AffineIterator/simple_loop.ll new file mode 100755 index 00000000000..e374b1c2da9 --- /dev/null +++ b/polly/test/AffineIterator/simple_loop.ll @@ -0,0 +1,25 @@ +; RUN: opt %loadPolly %defaultOpts -print-scev-affine -analyze < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @f(i32 %a, i32 %b, i32 %c, i32 %d, i32* nocapture %x) nounwind { +entry: + br label %bb + +bb: ; preds = %bb, %entry + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb ] ; <i64> [#uses=3] +; CHECK: 1 * {0,+,1}<%bb> + 0 * 1 + %scevgep = getelementptr i32* %x, i64 %indvar ; <i32*> [#uses=1] +; CHECK: 4 * {0,+,1}<%bb> + 1 * %x + 0 * 1 + %i.04 = trunc i64 %indvar to i32 ; <i32> [#uses=1] +; CHECK: 1 * {0,+,1}<%bb> + 0 * 1 + store i32 %i.04, i32* %scevgep, align 4 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] +; CHECK: 1 * {0,+,1}<%bb> + 1 * 1 + %exitcond = icmp eq i64 %indvar.next, 64 ; <i1> [#uses=1] + br i1 %exitcond, label %bb2, label %bb + +bb2: ; preds = %bb + ret i32 %a +} diff --git a/polly/test/AffineIterator/simple_nest.ll b/polly/test/AffineIterator/simple_nest.ll new file mode 100755 index 00000000000..c313e247e8e --- /dev/null +++ b/polly/test/AffineIterator/simple_nest.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly %defaultOpts -print-scev-affine -analyze < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @f(i32 %a, i32 %b, i32 %c, i32 %d, [4 x i32]* nocapture %x) nounwind { +entry: + br label %bb2.preheader + +bb1: ; preds = %bb2.preheader, %bb1 + %indvar = phi i64 [ 0, %bb2.preheader ], [ %indvar.next, %bb1 ] ; <i64> [#uses=3] +; CHECK: 1 * {0,+,1}<%bb1> + 0 * 1 + %scevgep = getelementptr [4 x i32]* %x, i64 %indvar, i64 %0 ; <i32*> [#uses=1] +; CHECK: 16 * {0,+,1}<%bb1> + 4 * {0,+,1}<%bb2.preheader> + 1 * %x + 0 * 1 + %tmp = mul i64 %indvar, %0 ; <i64> [#uses=1] +; CHECK: 1 * {0,+,{0,+,1}<%bb2.preheader>}<%bb1> + 0 * 1 + %tmp13 = trunc i64 %tmp to i32 ; <i32> [#uses=1] +; CHECK: 1 * {0,+,{0,+,1}<%bb2.preheader>}<%bb1> + 0 * 1 + store i32 %tmp13, i32* %scevgep, align 4 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] +; CHECK: 1 * {0,+,1}<%bb1> + 1 * 1 + %exitcond = icmp eq i64 %indvar.next, 64 ; <i1> [#uses=1] + br i1 %exitcond, label %bb3, label %bb1 + +bb3: ; preds = %bb1 + %indvar.next12 = add i64 %0, 1 ; <i64> [#uses=2] +; CHECK: 1 * {0,+,1}<%bb2.preheader> + 1 * 1 + %exitcond14 = icmp eq i64 %indvar.next12, 64 ; <i1> [#uses=1] + br i1 %exitcond14, label %bb5, label %bb2.preheader + +bb2.preheader: ; preds = %bb3, %entry + %0 = phi i64 [ 0, %entry ], [ %indvar.next12, %bb3 ] ; <i64> [#uses=3] +; CHECK: 1 * {0,+,1}<%bb2.preheader> + 0 * 1 + br label %bb1 + +bb5: ; preds = %bb3 + ret i32 %a +} diff --git a/polly/test/CMakeLists.txt b/polly/test/CMakeLists.txt new file mode 100644 index 00000000000..a3b32de0ca3 --- /dev/null +++ b/polly/test/CMakeLists.txt @@ -0,0 +1,52 @@ +set(POLLY_TEST_DIRECTORIES + "ScopInfo" + "AffineIterator" + "CodeGen" + "OpenMP" + "polybench") + +set(LLVM_SOURCE_DIR "${LLVM_MAIN_SRC_DIR}") +set(LLVM_BINARY_DIR "${LLVM_BINARY_DIR}") +set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/") +set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib") +set(POLLY_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..") +set(POLLY_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/..") + +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg) + +include(FindPythonInterp) +if(PYTHONINTERP_FOUND) + set(POLLY_TEST_EXTRA_ARGS) + if (MSVC OR XCODE) + set(POLLY_TEST_EXTRA_ARGS "--no-progress-bar") + endif() + + option(POLLY_TEST_USE_VG "Run Polly tests under Valgrind" OFF) + if(POLLY_TEST_USE_VG) + set(POLLY_TEST_EXTRA_ARGS ${POLLY_TEST_EXTRA_ARGS} "--vg") + endif () + + foreach(testdir ${POLLY_TEST_DIRECTORIES}) + add_custom_target(polly-test-${testdir} + COMMAND ${PYTHON_EXECUTABLE} + ${LLVM_SOURCE_DIR}/utils/lit/lit.py + --param polly_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg + --param build_config=${CMAKE_CFG_INTDIR} + -sv ${POLLY_TEST_EXTRA_ARGS} + ${CMAKE_CURRENT_BINARY_DIR}/${testdir} + DEPENDS opt LLVMPolly + COMMENT "Running Polly regression tests in ${testdir}") + endforeach() + + add_custom_target(polly-test + COMMAND ${PYTHON_EXECUTABLE} + ${LLVM_SOURCE_DIR}/utils/lit/lit.py + --param polly_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg + --param build_config=${CMAKE_CFG_INTDIR} + -sv ${POLLY_TEST_EXTRA_ARGS} + ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS opt LLVMPolly + COMMENT "Running Polly regression tests") +endif() diff --git a/polly/test/CodeGen/20100617.ll b/polly/test/CodeGen/20100617.ll new file mode 100644 index 00000000000..6c76e84fd97 --- /dev/null +++ b/polly/test/CodeGen/20100617.ll @@ -0,0 +1,20 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s +; ModuleID = 'a' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @init_array() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.cond1, %entry + %indvar1 = phi i64 [ %indvar.next2, %for.cond1 ], [ 0, %entry ] ; <i64> [#uses=1] + br i1 false, label %for.cond1, label %for.end32 + +for.cond1: ; preds = %for.cond + %indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1] + br label %for.cond + +for.end32: ; preds = %for.cond + ret void +} diff --git a/polly/test/CodeGen/20100622.ll b/polly/test/CodeGen/20100622.ll new file mode 100644 index 00000000000..d2d1e47c639 --- /dev/null +++ b/polly/test/CodeGen/20100622.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | not FileCheck %s + +; ModuleID = 'a' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" +target triple = "i386-portbld-freebsd8.0" + +define void @MAIN__() nounwind { +entry: + br i1 undef, label %bb6.preheader, label %bb3 + +bb3: ; preds = %bb3, %entry + br i1 undef, label %bb6.preheader, label %bb3 + +bb6.preheader: ; preds = %bb3, %entry + br i1 undef, label %bb11, label %bb9.preheader + +bb9.preheader: ; preds = %bb6.preheader + br label %bb11 + +bb11: ; preds = %bb9.preheader, %bb6.preheader + br label %bb15 + +bb15: ; preds = %bb15, %bb11 + br i1 undef, label %bb26.loopexit, label %bb15 + +bb26.loopexit: ; preds = %bb15 + br i1 undef, label %bb31, label %bb29.preheader + +bb29.preheader: ; preds = %bb26.loopexit + br label %bb29 + +bb29: ; preds = %bb29, %bb29.preheader + %indvar47 = phi i32 [ 0, %bb29.preheader ], [ %indvar.next48, %bb29 ] ; <i32> [#uses=1] + %indvar.next48 = add i32 %indvar47, 1 ; <i32> [#uses=2] + %exitcond50 = icmp eq i32 %indvar.next48, undef ; <i1> [#uses=1] + br i1 %exitcond50, label %bb31, label %bb29 + +bb31: ; preds = %bb29, %bb26.loopexit + %errtot.3 = phi float [ undef, %bb26.loopexit ], [ undef, %bb29 ] ; <float> [#uses=0] + ret void +} + +; CHECK: SCOP: diff --git a/polly/test/CodeGen/20100707.ll b/polly/test/CodeGen/20100707.ll new file mode 100644 index 00000000000..c427cfe2c65 --- /dev/null +++ b/polly/test/CodeGen/20100707.ll @@ -0,0 +1,28 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s +; ModuleID = 'a' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @clause_SetSplitField(i32 %Length) nounwind inlinehint { +entry: + br i1 undef, label %bb1, label %bb6 + +bb1: ; preds = %entry + unreachable + +bb6: ; preds = %entry + %tmp = zext i32 %Length to i64 ; <i64> [#uses=1] + br label %bb8 + +bb7: ; preds = %bb8 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %bb8 + +bb8: ; preds = %bb7, %bb6 + %indvar = phi i64 [ %indvar.next, %bb7 ], [ 0, %bb6 ] ; <i64> [#uses=2] + %exitcond = icmp ne i64 %indvar, %tmp ; <i1> [#uses=1] + br i1 %exitcond, label %bb7, label %return + +return: ; preds = %bb8 + ret void +} diff --git a/polly/test/CodeGen/20100707_2.ll b/polly/test/CodeGen/20100707_2.ll new file mode 100644 index 00000000000..54b78a6d85c --- /dev/null +++ b/polly/test/CodeGen/20100707_2.ll @@ -0,0 +1,116 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@win193 = external global [4 x [36 x double]], align 32 ; <[4 x [36 x double]]*> [#uses=3] +@sb_sample = external global [2 x [2 x [18 x [32 x double]]]], align 32 ; <[2 x [2 x [18 x [32 x double]]]]*> [#uses=2] + +define void @mdct_sub48() nounwind { +entry: + br i1 undef, label %bb, label %bb54 + +bb: ; preds = %entry + br label %bb54 + +bb3: ; preds = %bb50 + br label %bb8 + +bb4: ; preds = %bb8 + br label %bb8 + +bb8: ; preds = %bb4, %bb3 + br i1 undef, label %bb4, label %bb9 + +bb9: ; preds = %bb8 + br label %bb48 + +bb25: ; preds = %bb48 + br i1 false, label %bb26, label %bb27 + +bb26: ; preds = %bb48, %bb25 + br label %bb37 + +bb27: ; preds = %bb25 + br i1 undef, label %bb32, label %bb35 + +bb32: ; preds = %bb27 + br label %bb37 + +bb34: ; preds = %bb35 + %0 = getelementptr inbounds [36 x double]* undef, i64 0, i64 0 ; <double*> [#uses=0] + %1 = getelementptr inbounds [18 x [32 x double]]* undef, i64 0, i64 0 ; <[32 x double]*> [#uses=1] + %2 = getelementptr inbounds [32 x double]* %1, i64 0, i64 0 ; <double*> [#uses=0] + %3 = getelementptr inbounds [36 x double]* undef, i64 0, i64 0 ; <double*> [#uses=0] + %4 = sub nsw i32 17, %k.4 ; <i32> [#uses=1] + %5 = getelementptr inbounds [2 x [2 x [18 x [32 x double]]]]* @sb_sample, i64 0, i64 0 ; <[2 x [18 x [32 x double]]]*> [#uses=1] + %6 = getelementptr inbounds [2 x [18 x [32 x double]]]* %5, i64 0, i64 0 ; <[18 x [32 x double]]*> [#uses=1] + %7 = sext i32 %4 to i64 ; <i64> [#uses=1] + %8 = getelementptr inbounds [18 x [32 x double]]* %6, i64 0, i64 %7 ; <[32 x double]*> [#uses=1] + %9 = getelementptr inbounds [32 x double]* %8, i64 0, i64 0 ; <double*> [#uses=1] + %10 = load double* %9, align 8 ; <double> [#uses=0] + %11 = fsub double 0.000000e+00, undef ; <double> [#uses=1] + %12 = getelementptr inbounds double* getelementptr inbounds ([4 x [36 x double]]* @win193, i64 0, i64 2, i64 4), i64 0 ; <double*> [#uses=1] + store double %11, double* %12, align 8 + %13 = add nsw i32 %k.4, 9 ; <i32> [#uses=1] + %14 = add nsw i32 %k.4, 18 ; <i32> [#uses=1] + %15 = getelementptr inbounds [4 x [36 x double]]* @win193, i64 0, i64 0 ; <[36 x double]*> [#uses=1] + %16 = sext i32 %14 to i64 ; <i64> [#uses=1] + %17 = getelementptr inbounds [36 x double]* %15, i64 0, i64 %16 ; <double*> [#uses=1] + %18 = load double* %17, align 8 ; <double> [#uses=0] + %19 = sext i32 %k.4 to i64 ; <i64> [#uses=1] + %20 = getelementptr inbounds [18 x [32 x double]]* undef, i64 0, i64 %19 ; <[32 x double]*> [#uses=1] + %21 = sext i32 %band.2 to i64 ; <i64> [#uses=1] + %22 = getelementptr inbounds [32 x double]* %20, i64 0, i64 %21 ; <double*> [#uses=1] + %23 = load double* %22, align 8 ; <double> [#uses=0] + %24 = sext i32 %39 to i64 ; <i64> [#uses=1] + %25 = getelementptr inbounds [4 x [36 x double]]* @win193, i64 0, i64 %24 ; <[36 x double]*> [#uses=1] + %26 = getelementptr inbounds [36 x double]* %25, i64 0, i64 0 ; <double*> [#uses=1] + %27 = load double* %26, align 8 ; <double> [#uses=0] + %28 = sub nsw i32 17, %k.4 ; <i32> [#uses=1] + %29 = getelementptr inbounds [2 x [2 x [18 x [32 x double]]]]* @sb_sample, i64 0, i64 0 ; <[2 x [18 x [32 x double]]]*> [#uses=1] + %30 = getelementptr inbounds [2 x [18 x [32 x double]]]* %29, i64 0, i64 0 ; <[18 x [32 x double]]*> [#uses=1] + %31 = sext i32 %28 to i64 ; <i64> [#uses=1] + %32 = getelementptr inbounds [18 x [32 x double]]* %30, i64 0, i64 %31 ; <[32 x double]*> [#uses=1] + %33 = getelementptr inbounds [32 x double]* %32, i64 0, i64 0 ; <double*> [#uses=1] + %34 = load double* %33, align 8 ; <double> [#uses=0] + %35 = sext i32 %13 to i64 ; <i64> [#uses=1] + %36 = getelementptr inbounds double* getelementptr inbounds ([4 x [36 x double]]* @win193, i64 0, i64 2, i64 4), i64 %35 ; <double*> [#uses=1] + store double 0.000000e+00, double* %36, align 8 + %37 = sub nsw i32 %k.4, 1 ; <i32> [#uses=1] + br label %bb35 + +bb35: ; preds = %bb34, %bb27 + %k.4 = phi i32 [ %37, %bb34 ], [ 8, %bb27 ] ; <i32> [#uses=6] + br i1 undef, label %bb34, label %bb36 + +bb36: ; preds = %bb35 + unreachable + +bb37: ; preds = %bb32, %bb26 + %38 = add nsw i32 %band.2, 1 ; <i32> [#uses=1] + br label %bb48 + +bb48: ; preds = %bb37, %bb9 + %band.2 = phi i32 [ %38, %bb37 ], [ 0, %bb9 ] ; <i32> [#uses=2] + %39 = load i32* null, align 8 ; <i32> [#uses=1] + br i1 undef, label %bb26, label %bb25 + +bb50: ; preds = %bb54 + br i1 undef, label %bb3, label %bb51 + +bb51: ; preds = %bb50 + br i1 undef, label %bb52, label %bb53 + +bb52: ; preds = %bb51 + unreachable + +bb53: ; preds = %bb51 + br label %bb54 + +bb54: ; preds = %bb53, %bb, %entry + br i1 undef, label %bb50, label %return + +return: ; preds = %bb54 + ret void +} diff --git a/polly/test/CodeGen/20100708.ll b/polly/test/CodeGen/20100708.ll new file mode 100644 index 00000000000..e10f3764d60 --- /dev/null +++ b/polly/test/CodeGen/20100708.ll @@ -0,0 +1,19 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect < %s +; ModuleID = '/tmp/bug.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define fastcc void @execute() nounwind { +entry: + br i1 undef, label %check_stack.exit456.thread, label %bb.i451.preheader + +bb.i451.preheader: ; preds = %bb116 + br label %bb.i451 + +bb.i451: ; preds = %bb.i451, %bb.i451.preheader + br label %bb.i451 + +check_stack.exit456.thread: ; preds = %bb116 + unreachable + +} diff --git a/polly/test/CodeGen/20100708_2.ll b/polly/test/CodeGen/20100708_2.ll new file mode 100644 index 00000000000..2dda1659104 --- /dev/null +++ b/polly/test/CodeGen/20100708_2.ll @@ -0,0 +1,29 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s +; ModuleID = '/home/grosser/Projekte/polly/git/tools/polly/test/CodeGen/20100708_2.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +define void @init_array() nounwind { +bb: + br label %bb1 + +bb1: ; preds = %bb4, %bb + br i1 undef, label %bb2, label %bb5 + +bb2: ; preds = %bb3, %bb1 + %indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb1 ] ; <i64> [#uses=1] + %tmp3 = trunc i64 undef to i32 ; <i32> [#uses=1] + br i1 false, label %bb3, label %bb4 + +bb3: ; preds = %bb2 + %tmp = srem i32 %tmp3, 1024 ; <i32> [#uses=0] + store double undef, double* undef + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %bb2 + +bb4: ; preds = %bb2 + br label %bb1 + +bb5: ; preds = %bb1 + ret void +} diff --git a/polly/test/CodeGen/20100713.ll b/polly/test/CodeGen/20100713.ll new file mode 100644 index 00000000000..612750eb366 --- /dev/null +++ b/polly/test/CodeGen/20100713.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @fft_float(i32 %NumSamples) nounwind { + br label %bb18 + +bb18: ; preds = %bb17 + br i1 false, label %bb19, label %bb22 + +bb19: ; preds = %bb18 + %a = uitofp i32 %NumSamples to double ; <double> [#uses=1] + br label %bb21 + +bb20: ; preds = %bb21 + %1 = load float* undef, align 4 ; <float> [#uses=0] + %2 = fpext float undef to double ; <double> [#uses=1] + %3 = fdiv double %2, %a ; <double> [#uses=0] + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %bb21 + +bb21: ; preds = %bb20, %bb19 + %indvar = phi i64 [ %indvar.next, %bb20 ], [ 0, %bb19 ] ; <i64> [#uses=1] + br i1 false, label %bb20, label %bb22.loopexit + +bb22.loopexit: ; preds = %bb21 + br label %bb22 + +bb22: ; preds = %bb22.loopexit, %bb18 + br label %return + +return: ; preds = %bb22 + ret void +} diff --git a/polly/test/CodeGen/20100713_2.ll b/polly/test/CodeGen/20100713_2.ll new file mode 100644 index 00000000000..e4065cd6143 --- /dev/null +++ b/polly/test/CodeGen/20100713_2.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define hidden void @luaD_callhook() nounwind { +entry: + br i1 undef, label %bb, label %return + +bb: ; preds = %entry + br i1 undef, label %bb1, label %return + +bb1: ; preds = %bb + %0 = sub nsw i64 undef, undef ; <i64> [#uses=1] + br i1 false, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + br label %bb4 + +bb3: ; preds = %bb1 + br label %bb4 + +bb4: ; preds = %bb3, %bb2 + br i1 undef, label %bb5, label %bb6 + +bb5: ; preds = %bb4 + unreachable + +bb6: ; preds = %bb4 + %1 = getelementptr inbounds i8* undef, i64 %0 ; <i8*> [#uses=0] + ret void + +return: ; preds = %bb, %entry + ret void +} diff --git a/polly/test/CodeGen/20100717.ll b/polly/test/CodeGen/20100717.ll new file mode 100644 index 00000000000..c97a2eab489 --- /dev/null +++ b/polly/test/CodeGen/20100717.ll @@ -0,0 +1,40 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -disable-output < %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @matrixTranspose(double** %A) nounwind { +entry: + br label %bb4 + +bb: ; preds = %bb4 + %0 = add nsw i32 %i.0, 1 ; <i32> [#uses=1] + br label %bb2 + +bb1: ; preds = %bb2 + %1 = getelementptr inbounds double** %A, i64 0 ; <double**> [#uses=0] + %2 = getelementptr inbounds double** %A, i64 0 ; <double**> [#uses=0] + %3 = getelementptr inbounds double** %A, i64 0 ; <double**> [#uses=0] + %4 = sext i32 %j.0 to i64 ; <i64> [#uses=1] + %5 = getelementptr inbounds double** %A, i64 %4 ; <double**> [#uses=1] + %6 = load double** %5, align 8 ; <double*> [#uses=0] + %7 = add nsw i32 %j.0, 1 ; <i32> [#uses=1] + br label %bb2 + +bb2: ; preds = %bb1, %bb + %j.0 = phi i32 [ %0, %bb ], [ %7, %bb1 ] ; <i32> [#uses=3] + %8 = icmp sle i32 %j.0, 50 ; <i1> [#uses=1] + br i1 %8, label %bb1, label %bb3 + +bb3: ; preds = %bb2 + %9 = add nsw i32 %i.0, 1 ; <i32> [#uses=1] + br label %bb4 + +bb4: ; preds = %bb3, %entry + %i.0 = phi i32 [ 0, %entry ], [ %9, %bb3 ] ; <i32> [#uses=3] + %10 = icmp sle i32 %i.0, 50 ; <i1> [#uses=1] + br i1 %10, label %bb, label %return + +return: ; preds = %bb4 + ret void +} diff --git a/polly/test/CodeGen/20100718-DomInfo-2.ll b/polly/test/CodeGen/20100718-DomInfo-2.ll new file mode 100644 index 00000000000..67279ec2265 --- /dev/null +++ b/polly/test/CodeGen/20100718-DomInfo-2.ll @@ -0,0 +1,36 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -verify-dom-info -disable-output < %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @getNonAffNeighbour() nounwind { +entry: + br i1 undef, label %bb, label %bb6 + +bb: ; preds = %entry + br i1 false, label %bb1, label %bb2 + +bb1: ; preds = %bb + br label %bb16 + +bb2: ; preds = %bb + br i1 false, label %bb3, label %bb4 + +bb3: ; preds = %bb2 + br label %bb16 + +bb4: ; preds = %bb2 + br label %bb16 + +bb6: ; preds = %entry + br i1 false, label %bb7, label %bb9 + +bb7: ; preds = %bb6 + br label %bb16 + +bb9: ; preds = %bb6 + br label %bb16 + +bb16: ; preds = %bb9, %bb7, %bb4, %bb3, %bb1 + ret void +} diff --git a/polly/test/CodeGen/20100718-DomInfo.ll b/polly/test/CodeGen/20100718-DomInfo.ll new file mode 100644 index 00000000000..fdb525f9a03 --- /dev/null +++ b/polly/test/CodeGen/20100718-DomInfo.ll @@ -0,0 +1,29 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -verify-dom-info -disable-output < %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @intrapred_luma_16x16(i32 %predmode) nounwind { +entry: + switch i32 %predmode, label %bb81 [ + i32 0, label %bb25 + i32 1, label %bb26 + ] + +bb23: ; preds = %bb25 + %indvar.next95 = add i64 %indvar94, 1 ; <i64> [#uses=1] + br label %bb25 + +bb25: ; preds = %bb23, %entry + %indvar94 = phi i64 [ %indvar.next95, %bb23 ], [ 0, %entry ] ; <i64> [#uses=1] + br i1 false, label %bb23, label %return + +bb26: ; preds = %entry + ret void + +bb81: ; preds = %entry + ret void + +return: ; preds = %bb25 + ret void +} diff --git a/polly/test/CodeGen/20100720-MultipleConditions.c b/polly/test/CodeGen/20100720-MultipleConditions.c new file mode 100644 index 00000000000..15d6d56c0b5 --- /dev/null +++ b/polly/test/CodeGen/20100720-MultipleConditions.c @@ -0,0 +1,23 @@ +int bar1(); +int bar2(); +int bar3(); +int k; +#define N 100 +int A[N]; + +int main() { + int i, j, z; + + __sync_synchronize(); + for (i = 0; i < N; i++) { + if (i < 50) + A[i] = 8; + if (i < 4) + A[i] = 9; + if (i < 3) + A[i] = 10; + } + __sync_synchronize(); + + return A[z]; +} diff --git a/polly/test/CodeGen/20100720-MultipleConditions.ll b/polly/test/CodeGen/20100720-MultipleConditions.ll new file mode 100644 index 00000000000..3d1e4429760 --- /dev/null +++ b/polly/test/CodeGen/20100720-MultipleConditions.ll @@ -0,0 +1,74 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze < %s +; ModuleID = '20100720-MultipleConditions.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +@A = common global [100 x i32] zeroinitializer, align 16 ; <[100 x i32]*> [#uses=2] +@k = common global i32 0, align 4 ; <i32*> [#uses=0] + +define i32 @main() nounwind { +; <label>:0 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %1 + +; <label>:1 ; preds = %12, %0 + %indvar = phi i64 [ %indvar.next, %12 ], [ 0, %0 ] ; <i64> [#uses=4] + %scevgep = getelementptr [100 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=3] + %i.0 = trunc i64 %indvar to i32 ; <i32> [#uses=3] + %exitcond = icmp ne i64 %indvar, 100 ; <i1> [#uses=1] + br i1 %exitcond, label %2, label %13 + +; <label>:2 ; preds = %1 + %3 = icmp slt i32 %i.0, 50 ; <i1> [#uses=1] + br i1 %3, label %4, label %5 + +; <label>:4 ; preds = %2 + store i32 8, i32* %scevgep + br label %5 + +; <label>:5 ; preds = %4, %2 + %6 = icmp slt i32 %i.0, 4 ; <i1> [#uses=1] + br i1 %6, label %7, label %8 + +; <label>:7 ; preds = %5 + store i32 9, i32* %scevgep + br label %8 + +; <label>:8 ; preds = %7, %5 + %9 = icmp slt i32 %i.0, 3 ; <i1> [#uses=1] + br i1 %9, label %10, label %11 + +; <label>:10 ; preds = %8 + store i32 10, i32* %scevgep + br label %11 + +; <label>:11 ; preds = %10, %8 + br label %12 + +; <label>:12 ; preds = %11 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %1 + +; <label>:13 ; preds = %1 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %14 = sext i32 undef to i64 ; <i64> [#uses=1] + %15 = getelementptr inbounds i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0), i64 %14 ; <i32*> [#uses=1] + %16 = load i32* %15 ; <i32> [#uses=1] + ret i32 %16 +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind +; CHECK: for (c2=0;c2<=2;c2++) { +; CHECK: S0(c2); +; CHECK: S1(c2); +; CHECK: S2(c2); +; CHECK: } +; CHECK: S0(3); +; CHECK: S1(3); +; CHECK: for (c2=4;c2<=49;c2++) { +; CHECK: S0(c2); +; CHECK: } +; CHECK: S0: Stmt_4 +; CHECK: S1: Stmt_7 +; CHECK: S2: Stmt_10 +; CHECK: diff --git a/polly/test/CodeGen/20100809-IndependentBlock.ll b/polly/test/CodeGen/20100809-IndependentBlock.ll new file mode 100644 index 00000000000..bd440ec56e5 --- /dev/null +++ b/polly/test/CodeGen/20100809-IndependentBlock.ll @@ -0,0 +1,31 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -disable-output %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" +define void @cfft2([2 x float]* %x) nounwind { +entry: + %d.1.reg2mem = alloca [2 x float]* ; <[2 x float]**> [#uses=3] + br i1 undef, label %bb2, label %bb34 + +bb2: ; preds = %bb34, %entry + ret void + +bb20: ; preds = %bb34 + store [2 x float]* undef, [2 x float]** %d.1.reg2mem + br i1 false, label %bb21, label %bb23 + +bb21: ; preds = %bb20 + %0 = getelementptr inbounds [2 x float]* %x, i64 undef ; <[2 x float]*> [#uses=1] + store [2 x float]* %0, [2 x float]** %d.1.reg2mem + br label %bb23 + +bb23: ; preds = %bb21, %bb20 + %d.1.reload = load [2 x float]** %d.1.reg2mem ; <[2 x float]*> [#uses=1] + br i1 undef, label %bb29, label %bb34 + +bb29: ; preds = %bb23 + %1 = getelementptr inbounds [2 x float]* %d.1.reload, i64 undef ; <[2 x float]*> [#uses=0] + br label %bb34 + +bb34: ; preds = %bb29, %bb23, %entry + br i1 undef, label %bb20, label %bb2 +} diff --git a/polly/test/CodeGen/20100811-ScalarDependencyBetweenBrAndCnd.ll b/polly/test/CodeGen/20100811-ScalarDependencyBetweenBrAndCnd.ll new file mode 100644 index 00000000000..7421a767a1b --- /dev/null +++ b/polly/test/CodeGen/20100811-ScalarDependencyBetweenBrAndCnd.ll @@ -0,0 +1,30 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -disable-output %s +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @main() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvar1 = phi i64 [ %indvar.next2, %for.inc ], [ 0, %entry ] ; <i64> [#uses=2] + %exitcond = icmp ne i64 %indvar1, 1024 ; <i1> [#uses=1] + br label %a + +a: ; preds = %for.cond + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %a + br label %for.inc + +for.inc: ; preds = %for.body + %indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1] + br label %for.cond + +for.end: ; preds = %a + br label %for.cond5 + +for.cond5: ; preds = %for.inc17, %for.end + ret void +} diff --git a/polly/test/CodeGen/20101030-Overflow.ll b/polly/test/CodeGen/20101030-Overflow.ll new file mode 100644 index 00000000000..e8f1a9a1049 --- /dev/null +++ b/polly/test/CodeGen/20101030-Overflow.ll @@ -0,0 +1,22 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @compdecomp() nounwind { +entry: + %max = alloca i64 + %i = load i64* undef + br label %bb37 + +bb37: ; preds = %bb36, %bb28 + %tmp = icmp ugt i64 %i, 0 + br i1 %tmp, label %bb38, label %bb39 + +bb38: ; preds = %bb37 + store i64 %i, i64* %max + br label %bb39 + +bb39: ; preds = %bb38, %bb37 + unreachable + +} diff --git a/polly/test/CodeGen/20101103-Overflow3.ll b/polly/test/CodeGen/20101103-Overflow3.ll new file mode 100644 index 00000000000..b56df93a07b --- /dev/null +++ b/polly/test/CodeGen/20101103-Overflow3.ll @@ -0,0 +1,24 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" +define void @Reflection_coefficients(i16* %r) nounwind { +bb20: + %indvar3.lcssa20.reload = load i64* undef + %tmp = mul i64 %indvar3.lcssa20.reload, -1 + %tmp5 = add i64 %tmp, 8 + br label %bb22 + +bb21: ; preds = %bb22 + %r_addr.1.moved.to.bb21 = getelementptr i16* %r, i64 0 + store i16 0, i16* %r_addr.1.moved.to.bb21, align 2 + %indvar.next = add i64 %indvar, 1 + br label %bb22 + +bb22: ; preds = %bb21, %bb20 + %indvar = phi i64 [ %indvar.next, %bb21 ], [ 0, %bb20 ] + %exitcond = icmp ne i64 %indvar, %tmp5 + br i1 %exitcond, label %bb21, label %return + +return: ; preds = %bb22 + ret void +} diff --git a/polly/test/CodeGen/20101103-signmissmatch.ll b/polly/test/CodeGen/20101103-signmissmatch.ll new file mode 100644 index 00000000000..5821a26e0a0 --- /dev/null +++ b/polly/test/CodeGen/20101103-signmissmatch.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +define void @CleanNet() nounwind { +entry: + %firstVia.0.reg2mem = alloca i64 + br label %bb7 + +bb7: ; preds = %bb7, %entry + br i1 undef, label %bb7, label %bb8 + +bb8: ; preds = %bb7 + %indvar5.lcssa.reload = load i64* undef + %tmp17 = mul i64 %indvar5.lcssa.reload, -1 + %tmp18 = add i64 0, %tmp17 + br label %bb18 + +bb13: ; preds = %bb18 + %0 = icmp ult i64 %i.1, 0 + br i1 %0, label %bb14, label %bb17 + +bb14: ; preds = %bb13 + store i64 %i.1, i64* %firstVia.0.reg2mem + br label %bb17 + +bb17: ; preds = %bb14, %bb13 + %indvar.next16 = add i64 %indvar15, 1 + br label %bb18 + +bb18: ; preds = %bb17, %bb8 + %indvar15 = phi i64 [ %indvar.next16, %bb17 ], [ 0, %bb8 ] + %i.1 = add i64 %tmp18, %indvar15 + br i1 undef, label %bb13, label %bb25 + +bb25: ; preds = %bb18 + ret void +} diff --git a/polly/test/CodeGen/20110226-Ignore-Dead-Code.ll b/polly/test/CodeGen/20110226-Ignore-Dead-Code.ll new file mode 100644 index 00000000000..fb88717aee9 --- /dev/null +++ b/polly/test/CodeGen/20110226-Ignore-Dead-Code.ll @@ -0,0 +1,60 @@ +; ModuleID = '20110226-Ignore-dead-code.ll' +; RUN: opt %loadPolly %defaultOpts -polly-codegen %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @main() nounwind { +.split: + br label %0 + +.loopexit.loopexit: ; preds = %.preheader.us + br label %.loopexit.simregexit + +.loopexit.simregexit: ; preds = %.loopexit.loopexit, %._crit_edge + br label %.loopexit + +.loopexit: ; preds = %.loopexit.simregexit + %indvar.next16 = add i64 %indvar15, 1 + %exitcond53 = icmp eq i64 %indvar.next16, 2048 + br i1 %exitcond53, label %2, label %0 + +; <label>:0 ; preds = %.loopexit, %.split + %indvar15 = phi i64 [ 0, %.split ], [ %indvar.next16, %.loopexit ] + br label %.simregentry + +.simregentry: ; preds = %0 + %indvar15.ph = phi i64 [ %indvar15, %0 ] + %tmp67 = add i64 %indvar15, 1 + %i.06 = trunc i64 %tmp67 to i32 + %tmp25 = add i64 undef, 1 + %1 = icmp slt i32 %i.06, 2048 + br i1 %1, label %.lr.ph.preheader, label %._crit_edge.simregexit + +.lr.ph.preheader: ; preds = %.simregentry + br label %.lr.ph + +.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader + %indvar33 = phi i64 [ %indvar.next34, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %indvar.next34 = add i64 %indvar33, 1 + %exitcond40 = icmp eq i64 %indvar.next34, 0 + br i1 %exitcond40, label %._crit_edge.loopexit, label %.lr.ph + +._crit_edge.loopexit: ; preds = %.lr.ph + br label %._crit_edge.simregexit + +._crit_edge.simregexit: ; preds = %.simregentry, %._crit_edge.loopexit + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.simregexit + br i1 false, label %.loopexit.simregexit, label %.preheader.us.preheader + +.preheader.us.preheader: ; preds = %._crit_edge + br label %.preheader.us + +.preheader.us: ; preds = %.preheader.us, %.preheader.us.preheader + %exitcond26.old = icmp eq i64 undef, %tmp25 + br i1 false, label %.loopexit.loopexit, label %.preheader.us + +; <label>:2 ; preds = %.loopexit + ret void +} diff --git a/polly/test/CodeGen/20110226-PHI-Node-removed.ll b/polly/test/CodeGen/20110226-PHI-Node-removed.ll new file mode 100644 index 00000000000..4c68bf58cc6 --- /dev/null +++ b/polly/test/CodeGen/20110226-PHI-Node-removed.ll @@ -0,0 +1,30 @@ +; ModuleID = '20110226.ll' +; RUN: opt %loadPolly %defaultOpts -polly-codegen %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @main() nounwind { +.split: + br label %0 + +.loopexit: ; preds = %.lr.ph, %0 + %indvar.next16 = add i64 %indvar15, 1 + %exitcond53 = icmp eq i64 %indvar.next16, 2048 + br i1 %exitcond53, label %1, label %0 + +; <label>:0 ; preds = %.loopexit, %.split + %indvar15 = phi i64 [ 0, %.split ], [ %indvar.next16, %.loopexit ] + %tmp59 = sub i64 2046, %indvar15 + %tmp38 = and i64 %tmp59, 4294967295 + %tmp39 = add i64 %tmp38, 1 + br i1 false, label %.lr.ph, label %.loopexit + +.lr.ph: ; preds = %.lr.ph, %0 + %indvar33 = phi i64 [ %indvar.next34, %.lr.ph ], [ 0, %0 ] + %indvar.next34 = add i64 %indvar33, 1 + %exitcond40 = icmp eq i64 %indvar.next34, %tmp39 + br i1 %exitcond40, label %.loopexit, label %.lr.ph + +; <label>:1 ; preds = %.loopexit + ret void +} diff --git a/polly/test/CodeGen/20110312-Fail-without-basicaa.ll b/polly/test/CodeGen/20110312-Fail-without-basicaa.ll new file mode 100644 index 00000000000..08bca383138 --- /dev/null +++ b/polly/test/CodeGen/20110312-Fail-without-basicaa.ll @@ -0,0 +1,28 @@ +; ModuleID = 'test1.ll' +; This should be run without alias analysis enabled. +;RUN: opt %loadPolly -polly-independent %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +define i32 @main() nounwind { +entry: + %t.02.reg2mem = alloca float + br label %entry.split + +entry.split: ; preds = %entry + store float 0.000000e+00, float* %t.02.reg2mem + br label %for.body + +for.body: ; preds = %for.body, %entry.split + %j.01 = phi i32 [ 0, %entry.split ], [ %inc3, %for.body ] + %t.02.reload = load float* %t.02.reg2mem + %inc = fadd float %t.02.reload, 1.000000e+00 + %inc3 = add nsw i32 %j.01, 1 + %exitcond = icmp eq i32 %inc3, 5000001 + store float %inc, float* %t.02.reg2mem + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %conv = fptosi float %inc to i32 + ret i32 %conv +} diff --git a/polly/test/CodeGen/OpenMP/add_memref.c b/polly/test/CodeGen/OpenMP/add_memref.c new file mode 100644 index 00000000000..02609a3b09a --- /dev/null +++ b/polly/test/CodeGen/OpenMP/add_memref.c @@ -0,0 +1,10 @@ +#define N 10 + +void foo() { + float A[N]; + + for (int i=0; i < N; i++) + A[i] = 10; + + return; +} diff --git a/polly/test/CodeGen/OpenMP/add_memref.ll b/polly/test/CodeGen/OpenMP/add_memref.ll new file mode 100644 index 00000000000..49d9715977f --- /dev/null +++ b/polly/test/CodeGen/OpenMP/add_memref.ll @@ -0,0 +1,29 @@ +; ModuleID = 'add_memref.s' +; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-openmp -disable-verify -S < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +define void @foo() nounwind { +entry: + %A = alloca [10 x float], align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, 10 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds [10 x float]* %A, i32 0, i32 %i.0 + store float 1.000000e+01, float* %arrayidx + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +; CHECK: store [10 x float]* %A, [10 x float]** diff --git a/polly/test/CodeGen/OpenMP/extract_memref.c b/polly/test/CodeGen/OpenMP/extract_memref.c new file mode 100644 index 00000000000..fa2e63edb93 --- /dev/null +++ b/polly/test/CodeGen/OpenMP/extract_memref.c @@ -0,0 +1,17 @@ +#define N 10 + +void foo() { + float A[N]; + int i = 0; + + for (i=0; i < N; i++) + A[i] = 10; + + return; +} + + +int main() +{ + foo(); +} diff --git a/polly/test/CodeGen/OpenMP/extract_memref.ll b/polly/test/CodeGen/OpenMP/extract_memref.ll new file mode 100644 index 00000000000..797ea0f0490 --- /dev/null +++ b/polly/test/CodeGen/OpenMP/extract_memref.ll @@ -0,0 +1,37 @@ +; ModuleID = 'extract_memref.s' +; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-openmp -S < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +define void @foo() nounwind { +entry: + %A = alloca [10 x float], align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, 10 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds [10 x float]* %A, i32 0, i32 %i.0 + store float 1.000000e+01, float* %arrayidx + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +define i32 @main() nounwind { +entry: + call void @foo() + ret i32 0 +} +; CHECK: store [10 x float]* %A, [10 x float]** +; CHECK: getelementptr inbounds %foo.omp_subfn.omp.userContext* %omp.userContext1 +; CHECK: load [10 x float]** +; CHECK: getelementptr inbounds [10 x float]* diff --git a/polly/test/CodeGen/OpenMP/invalidate_subfn_scops.c b/polly/test/CodeGen/OpenMP/invalidate_subfn_scops.c new file mode 100644 index 00000000000..4054d059907 --- /dev/null +++ b/polly/test/CodeGen/OpenMP/invalidate_subfn_scops.c @@ -0,0 +1,11 @@ +#define N 500000 +float A[N]; +int main() { + int j, k; + + for(k = 0; k < N; k++) + for (j = 0; j <= N; j++) + A[j] = k; + + return 0; +} diff --git a/polly/test/CodeGen/OpenMP/invalidate_subfn_scops.ll b/polly/test/CodeGen/OpenMP/invalidate_subfn_scops.ll new file mode 100644 index 00000000000..86d5f900dba --- /dev/null +++ b/polly/test/CodeGen/OpenMP/invalidate_subfn_scops.ll @@ -0,0 +1,38 @@ +; ModuleID = 'test.ll' +; RUN: opt %loadPolly %defaultOpts -basicaa -polly-codegen -enable-polly-openmp < %s | not FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +@A = common global [500000 x float] zeroinitializer, align 4 + +define i32 @main() nounwind { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc8, %entry.split + %0 = phi i32 [ 0, %entry.split ], [ %inc10, %for.inc8 ] + br label %for.body4 + +for.body4: ; preds = %for.body4, %for.cond1.preheader + %j.01 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ] + %arrayidx = getelementptr [500000 x float]* @A, i32 0, i32 %j.01 + %conv = sitofp i32 %0 to float + store float %conv, float* %arrayidx, align 4 + %inc = add nsw i32 %j.01, 1 + %exitcond = icmp eq i32 %inc, 500001 + br i1 %exitcond, label %for.inc8, label %for.body4 + +for.inc8: ; preds = %for.body4 + %inc10 = add nsw i32 %0, 1 + %exitcond3 = icmp eq i32 %inc10, 500000 + br i1 %exitcond3, label %for.end11, label %for.cond1.preheader + +for.end11: ; preds = %for.inc8 + ret i32 0 +} + + +; CHECK: define internal void @main.omp_subfn.omp_subfn diff --git a/polly/test/CodeGen/OpenMP/parallel_loop.c b/polly/test/CodeGen/OpenMP/parallel_loop.c new file mode 100644 index 00000000000..c776af39e4e --- /dev/null +++ b/polly/test/CodeGen/OpenMP/parallel_loop.c @@ -0,0 +1,21 @@ +#define M 1024 +#define N 1024 +#define K 1024 + +float A[M][K], B[K][N], C[M][N], X[K]; + +float parallel_loop() { + int i, j, k; + + for (i = 0; i < M; i++) + for (j = 0; j< N; j++) + for (k = 0; k < K; k++) + C[i][j] += A[i][k] * B[k][j]; + + for (i = 0; i < M; i++) + for (j = 0; j < N; j++) + for (k = 0; k < K; k++) + X[k] += X[k]; + + return C[42][42] + X[42]; +} diff --git a/polly/test/CodeGen/OpenMP/parallel_loop.ll b/polly/test/CodeGen/OpenMP/parallel_loop.ll new file mode 100644 index 00000000000..be3b4c74125 --- /dev/null +++ b/polly/test/CodeGen/OpenMP/parallel_loop.ll @@ -0,0 +1,186 @@ +; ModuleID = 'parallel_loop.s' +; RUN: opt %loadPolly %defaultOpts -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=`dirname %s` -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck -check-prefix=IMPORT %s +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=`dirname %s` -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck -check-prefix=IMPORT %s +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-postfix=tiled -polly-import-jscop-dir=`dirname %s` -polly-cloog -polly-codegen -enable-polly-openmp -analyze -disable-polly-legality < %s | FileCheck -check-prefix=TILED %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x [1024 x float]] zeroinitializer, align 16 +@B = common global [1024 x [1024 x float]] zeroinitializer, align 16 +@C = common global [1024 x [1024 x float]] zeroinitializer, align 16 +@X = common global [1024 x float] zeroinitializer, align 16 + +define float @parallel_loop() nounwind { +bb: + br label %bb18 + +bb18: ; preds = %bb32, %bb + %indvar9 = phi i64 [ %indvar.next10, %bb32 ], [ 0, %bb ] + %exitcond15 = icmp ne i64 %indvar9, 1024 + br i1 %exitcond15, label %bb19, label %bb33 + +bb19: ; preds = %bb18 + br label %bb20 + +bb20: ; preds = %bb30, %bb19 + %indvar6 = phi i64 [ %indvar.next7, %bb30 ], [ 0, %bb19 ] + %scevgep14 = getelementptr [1024 x [1024 x float]]* @C, i64 0, i64 %indvar9, i64 %indvar6 + %exitcond12 = icmp ne i64 %indvar6, 1024 + br i1 %exitcond12, label %bb21, label %bb31 + +bb21: ; preds = %bb20 + br label %bb22 + +bb22: ; preds = %bb28, %bb21 + %indvar3 = phi i64 [ %indvar.next4, %bb28 ], [ 0, %bb21 ] + %scevgep11 = getelementptr [1024 x [1024 x float]]* @A, i64 0, i64 %indvar9, i64 %indvar3 + %scevgep8 = getelementptr [1024 x [1024 x float]]* @B, i64 0, i64 %indvar3, i64 %indvar6 + %exitcond5 = icmp ne i64 %indvar3, 1024 + br i1 %exitcond5, label %bb23, label %bb29 + +bb23: ; preds = %bb22 + %tmp = load float* %scevgep11, align 4 + %tmp24 = load float* %scevgep8, align 4 + %tmp25 = fmul float %tmp, %tmp24 + %tmp26 = load float* %scevgep14, align 4 + %tmp27 = fadd float %tmp26, %tmp25 + store float %tmp27, float* %scevgep14, align 4 + br label %bb28 + +bb28: ; preds = %bb23 + %indvar.next4 = add i64 %indvar3, 1 + br label %bb22 + +bb29: ; preds = %bb22 + br label %bb30 + +bb30: ; preds = %bb29 + %indvar.next7 = add i64 %indvar6, 1 + br label %bb20 + +bb31: ; preds = %bb20 + br label %bb32 + +bb32: ; preds = %bb31 + %indvar.next10 = add i64 %indvar9, 1 + br label %bb18 + +bb33: ; preds = %bb18 + br label %bb34 + +bb34: ; preds = %bb48, %bb33 + %i.1 = phi i32 [ 0, %bb33 ], [ %tmp49, %bb48 ] + %exitcond2 = icmp ne i32 %i.1, 1024 + br i1 %exitcond2, label %bb35, label %bb50 + +bb35: ; preds = %bb34 + br label %bb36 + +bb36: ; preds = %bb45, %bb35 + %j.1 = phi i32 [ 0, %bb35 ], [ %tmp46, %bb45 ] + %exitcond1 = icmp ne i32 %j.1, 1024 + br i1 %exitcond1, label %bb37, label %bb47 + +bb37: ; preds = %bb36 + br label %bb38 + +bb38: ; preds = %bb43, %bb37 + %indvar = phi i64 [ %indvar.next, %bb43 ], [ 0, %bb37 ] + %scevgep = getelementptr [1024 x float]* @X, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %bb39, label %bb44 + +bb39: ; preds = %bb38 + %tmp40 = load float* %scevgep, align 4 + %tmp41 = load float* %scevgep, align 4 + %tmp42 = fadd float %tmp41, %tmp40 + store float %tmp42, float* %scevgep, align 4 + br label %bb43 + +bb43: ; preds = %bb39 + %indvar.next = add i64 %indvar, 1 + br label %bb38 + +bb44: ; preds = %bb38 + br label %bb45 + +bb45: ; preds = %bb44 + %tmp46 = add nsw i32 %j.1, 1 + br label %bb36 + +bb47: ; preds = %bb36 + br label %bb48 + +bb48: ; preds = %bb47 + %tmp49 = add nsw i32 %i.1, 1 + br label %bb34 + +bb50: ; preds = %bb34 + %tmp51 = load float* getelementptr inbounds ([1024 x [1024 x float]]* @C, i64 0, i64 42, i64 42), align 8 + %tmp52 = load float* getelementptr inbounds ([1024 x float]* @X, i64 0, i64 42), align 8 + %tmp53 = fadd float %tmp51, %tmp52 + ret float %tmp53 +} + +; CHECK: for (c2=0;c2<=1023;c2++) { +; CHECK: for (c4=0;c4<=1023;c4++) { +; CHECK: for (c6=0;c6<=1023;c6++) { +; CHECK: Stmt_bb23(c2,c4,c6); +; CHECK: } +; CHECK: } +; CHECK: } +; CHECK: for (c2=0;c2<=1023;c2++) { +; CHECK: for (c4=0;c4<=1023;c4++) { +; CHECK: for (c6=0;c6<=1023;c6++) { +; CHECK: Stmt_bb39(c2,c4,c6); +; CHECK: } +; CHECK: } +; CHECK: } +; CHECK: Parallel loop with iterator 'c2' generated +; CHECK: Parallel loop with iterator 'c6' generated +; CHECK-NOT: Parallel loop + + +; IMPORT: for (c2=0;c2<=1023;c2++) { +; IMPORT: for (c4=0;c4<=1023;c4++) { +; IMPORT: for (c6=0;c6<=1023;c6++) { +; IMPORT: Stmt_bb23(c2,c4,c6); +; IMPORT: Stmt_bb39(c2,c4,c6); +; IMPORT: } +; IMPORT: } +; IMPORT: } +; IMPORT-NOT: Parallel loop + +; TILED: for (c2=0;c2<=1023;c2+=4) { +; TILED: for (c4=0;c4<=1023;c4+=4) { +; TILED: for (c6=0;c6<=1023;c6+=4) { +; TILED: for (c8=c2;c8<=c2+3;c8++) { +; TILED: for (c9=c4;c9<=c4+3;c9++) { +; TILED: for (c10=c6;c10<=c6+3;c10++) { +; TILED: Stmt_bb23(c8,c9,c10); +; TILED: } +; TILED: } +; TILED: } +; TILED: } +; TILED: } +; TILED: } +; TILED: for (c2=0;c2<=1023;c2+=4) { +; TILED: for (c4=0;c4<=1023;c4+=4) { +; TILED: for (c6=0;c6<=1023;c6+=4) { +; TILED: for (c8=c2;c8<=c2+3;c8++) { +; TILED: for (c9=c4;c9<=c4+3;c9++) { +; TILED: for (c10=c6;c10<=c6+3;c10++) { +; TILED: Stmt_bb39(c8,c9,c10); +; TILED: } +; TILED: } +; TILED: } +; TILED: } +; TILED: } +; TILED: } +; I am not sure if we actually may have parallel loops here. The dependency +; analysis does not detect any. This may however be because we do not +; correctly update the imported schedule. Add a check that hopefully fails +; after this is corrected. Or someone proves there are no parallel loops and +; we can remove this comment. +; TILDED-NOT: Parallel loop diff --git a/polly/test/CodeGen/OpenMP/parallel_loop___%bb18---%bb50.jscop b/polly/test/CodeGen/OpenMP/parallel_loop___%bb18---%bb50.jscop new file mode 100644 index 00000000000..b4ce979a583 --- /dev/null +++ b/polly/test/CodeGen/OpenMP/parallel_loop___%bb18---%bb50.jscop @@ -0,0 +1,42 @@ +{ + "name": "bb18 => bb50", + "context": "{ [] }", + "statements": [{ + "name": "Stmt_bb23", + "domain": "{ Stmt_bb23[i0, i1, i2] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 1023 }", + "schedule": "{ Stmt_bb23[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] }", + "accesses": [{ + "kind": "read", + "relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_A[1024i0 + i2] }" + }, + { + "kind": "read", + "relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_B[i1 + 1024i2] }" + }, + { + "kind": "read", + "relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_C[1024i0 + i1] }" + }, + { + "kind": "write", + "relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_C[1024i0 + i1] }" + }] + }, + { + "name": "Stmt_bb39", + "domain": "{ Stmt_bb39[i0, i1, i2] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 1023 }", + "schedule": "{ Stmt_bb39[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] }", + "accesses": [{ + "kind": "read", + "relation": "{ Stmt_bb39[i0, i1, i2] -> MemRef_X[i2] }" + }, + { + "kind": "read", + "relation": "{ Stmt_bb39[i0, i1, i2] -> MemRef_X[i2] }" + }, + { + "kind": "write", + "relation": "{ Stmt_bb39[i0, i1, i2] -> MemRef_X[i2] }" + }] + }] +} diff --git a/polly/test/CodeGen/OpenMP/parallel_loop___%bb18---%bb50.jscop.tiled b/polly/test/CodeGen/OpenMP/parallel_loop___%bb18---%bb50.jscop.tiled new file mode 100644 index 00000000000..23c16993362 --- /dev/null +++ b/polly/test/CodeGen/OpenMP/parallel_loop___%bb18---%bb50.jscop.tiled @@ -0,0 +1,42 @@ +{ + "name": "bb18 => bb50", + "context": "{ [] }", + "statements": [{ + "name": "Stmt_bb23", + "domain": "{ Stmt_bb23[i0, i1, i2] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 1023 }", + "schedule": "{ Stmt_bb23[i0, i1, i2] -> scattering[0, o0, 0, o1, 0, o2, 0, i0, i1, i2] : (exists e0, e1, e2: 4e0 = o0 and 4e1 = o1 and 4e2 = o2 and o0 <= i0 <= 3 + o0 and o1 <= i1 <= 3 + o1 and o2 <= i2 <= 3 + o2)}", + "accesses": [{ + "kind": "read", + "relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_A[1024i0 + i2] }" + }, + { + "kind": "read", + "relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_B[i1 + 1024i2] }" + }, + { + "kind": "read", + "relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_C[1024i0 + i1] }" + }, + { + "kind": "write", + "relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_C[1024i0 + i1] }" + }] + }, + { + "name": "Stmt_bb39", + "domain": "{ Stmt_bb39[i0, i1, i2] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 1023 }", + "schedule": "{ Stmt_bb39[i0, i1, i2] -> scattering[1, o0, 0, o1, 0, o2, 0, i0, i1, i2] : (exists e0, e1, e2: 4e0 = o0 and 4e1 = o1 and 4e2 = o2 and o0 <= i0 <= 3 + o0 and o1 <= i1 <= 3 + o1 and o2 <= i2 <= 3 + o2)}", + "accesses": [{ + "kind": "read", + "relation": "{ Stmt_bb39[i0, i1, i2] -> MemRef_X[i2] }" + }, + { + "kind": "read", + "relation": "{ Stmt_bb39[i0, i1, i2] -> MemRef_X[i2] }" + }, + { + "kind": "write", + "relation": "{ Stmt_bb39[i0, i1, i2] -> MemRef_X[i2] }" + }] + }] +} diff --git a/polly/test/CodeGen/OpenMP/parallel_loop_simple.c b/polly/test/CodeGen/OpenMP/parallel_loop_simple.c new file mode 100644 index 00000000000..1c382fc992e --- /dev/null +++ b/polly/test/CodeGen/OpenMP/parallel_loop_simple.c @@ -0,0 +1,15 @@ +#define M 1024 +#define N 1024 +#define K 1024 + +float X[K]; + +float parallel_loop_simple() { + int i, k; + + for (i = 0; i < M; i++) + for (k = 0; k < K; k++) + X[k] += X[k]; + + return X[42]; +} diff --git a/polly/test/CodeGen/OpenMP/parallel_loop_simple.ll b/polly/test/CodeGen/OpenMP/parallel_loop_simple.ll new file mode 100644 index 00000000000..a22c73193f9 --- /dev/null +++ b/polly/test/CodeGen/OpenMP/parallel_loop_simple.ll @@ -0,0 +1,56 @@ +; ModuleID = 'parallel_loop_simple.s' +; RUN: opt %loadPolly %defaultOpts -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@X = common global [1024 x float] zeroinitializer, align 16 + +define float @parallel_loop_simple() nounwind { +bb: + br label %bb2 + +bb2: ; preds = %bb10, %bb + %i.0 = phi i32 [ 0, %bb ], [ %tmp11, %bb10 ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %bb3, label %bb12 + +bb3: ; preds = %bb2 + br label %bb4 + +bb4: ; preds = %bb8, %bb3 + %indvar = phi i64 [ %indvar.next, %bb8 ], [ 0, %bb3 ] + %scevgep = getelementptr [1024 x float]* @X, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %bb5, label %bb9 + +bb5: ; preds = %bb4 + %tmp = load float* %scevgep, align 4 + %tmp6 = load float* %scevgep, align 4 + %tmp7 = fadd float %tmp6, %tmp + store float %tmp7, float* %scevgep, align 4 + br label %bb8 + +bb8: ; preds = %bb5 + %indvar.next = add i64 %indvar, 1 + br label %bb4 + +bb9: ; preds = %bb4 + br label %bb10 + +bb10: ; preds = %bb9 + %tmp11 = add nsw i32 %i.0, 1 + br label %bb2 + +bb12: ; preds = %bb2 + %tmp13 = load float* getelementptr inbounds ([1024 x float]* @X, i64 0, i64 42), align 8 + ret float %tmp13 +} + +; CHECK: for (c2=0;c2<=1023;c2++) { +; CHECK: for (c4=0;c4<=1023;c4++) { +; CHECK: Stmt_bb5(c2,c4); +; CHECK: } +; CHECK: } +; CHECK: Parallel loop with iterator 'c4' generated +; CHECK-NOT: Parallel loop + diff --git a/polly/test/CodeGen/OpenMP/parallel_loop_simple2.c b/polly/test/CodeGen/OpenMP/parallel_loop_simple2.c new file mode 100644 index 00000000000..2fb50ebc2c8 --- /dev/null +++ b/polly/test/CodeGen/OpenMP/parallel_loop_simple2.c @@ -0,0 +1,15 @@ +#define N 1024 + +float C[N], X[N]; + +float parallel_loop_simple2() { + int j; + + for (j = 0; j < N; j++) + C[j] = j; + + for (j = 0; j < N; j++) + X[j] += X[j]; + + return C[42] + X[42]; +} diff --git a/polly/test/CodeGen/OpenMP/parallel_loop_simple2.ll b/polly/test/CodeGen/OpenMP/parallel_loop_simple2.ll new file mode 100644 index 00000000000..30004b01872 --- /dev/null +++ b/polly/test/CodeGen/OpenMP/parallel_loop_simple2.ll @@ -0,0 +1,64 @@ +; ModuleID = 'parallel_loop_simple2.s' +; RUN: opt %loadPolly %defaultOpts -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@C = common global [1024 x float] zeroinitializer, align 16 +@X = common global [1024 x float] zeroinitializer, align 16 + +define float @parallel_loop_simple2() nounwind { +bb: + br label %bb5 + +bb5: ; preds = %bb7, %bb + %indvar1 = phi i64 [ %indvar.next2, %bb7 ], [ 0, %bb ] + %scevgep4 = getelementptr [1024 x float]* @C, i64 0, i64 %indvar1 + %j.0 = trunc i64 %indvar1 to i32 + %exitcond3 = icmp ne i64 %indvar1, 1024 + br i1 %exitcond3, label %bb6, label %bb8 + +bb6: ; preds = %bb5 + %tmp = sitofp i32 %j.0 to float + store float %tmp, float* %scevgep4, align 4 + br label %bb7 + +bb7: ; preds = %bb6 + %indvar.next2 = add i64 %indvar1, 1 + br label %bb5 + +bb8: ; preds = %bb5 + br label %bb9 + +bb9: ; preds = %bb14, %bb8 + %indvar = phi i64 [ %indvar.next, %bb14 ], [ 0, %bb8 ] + %scevgep = getelementptr [1024 x float]* @X, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %bb10, label %bb15 + +bb10: ; preds = %bb9 + %tmp11 = load float* %scevgep, align 4 + %tmp12 = load float* %scevgep, align 4 + %tmp13 = fadd float %tmp12, %tmp11 + store float %tmp13, float* %scevgep, align 4 + br label %bb14 + +bb14: ; preds = %bb10 + %indvar.next = add i64 %indvar, 1 + br label %bb9 + +bb15: ; preds = %bb9 + %tmp16 = load float* getelementptr inbounds ([1024 x float]* @C, i64 0, i64 42), align 8 + %tmp17 = load float* getelementptr inbounds ([1024 x float]* @X, i64 0, i64 42), align 8 + %tmp18 = fadd float %tmp16, %tmp17 + ret float %tmp18 +} + +; CHECK: for (c2=0;c2<=1023;c2++) { +; CHECK: Stmt_bb6(c2); +; CHECK: } +; CHECK: for (c2=0;c2<=1023;c2++) { +; CHECK: Stmt_bb10(c2); +; CHECK: } +; CHECK: Parallel loop with iterator 'c2' generated +; CHECK: Parallel loop with iterator 'c2' generated +; CHECK-NOT: Parallel loop diff --git a/polly/test/CodeGen/OpenMP/simple_nested_loop.c b/polly/test/CodeGen/OpenMP/simple_nested_loop.c new file mode 100644 index 00000000000..6867d0b45c7 --- /dev/null +++ b/polly/test/CodeGen/OpenMP/simple_nested_loop.c @@ -0,0 +1,22 @@ +#include <string.h> +#define N 10 + +double A[N]; +double B[N]; + +void loop_openmp() { + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[j] += j; + } + } +} + +int main () { + memset(A, 0, sizeof(float) * N); + + loop_openmp(); + + return 0; +} + diff --git a/polly/test/CodeGen/OpenMP/simple_nested_loop.ll b/polly/test/CodeGen/OpenMP/simple_nested_loop.ll new file mode 100644 index 00000000000..d3f27055817 --- /dev/null +++ b/polly/test/CodeGen/OpenMP/simple_nested_loop.ll @@ -0,0 +1,63 @@ +; ModuleID = 'simple_nested_loop.s' +; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-openmp -S %s | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +@A = common global [10 x double] zeroinitializer, align 4 +@B = common global [10 x double] zeroinitializer, align 4 + +define void @loop_openmp() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ] + %exitcond1 = icmp ne i32 %i.0, 10 + br i1 %exitcond1, label %for.body, label %for.end13 + +for.body: ; preds = %for.cond + br label %for.cond2 + +for.cond2: ; preds = %for.inc, %for.body + %tmp = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %arrayidx = getelementptr [10 x double]* @A, i32 0, i32 %tmp + %exitcond = icmp ne i32 %tmp, 10 + br i1 %exitcond, label %for.body5, label %for.end + +for.body5: ; preds = %for.cond2 + %conv = sitofp i32 %tmp to double + %tmp8 = load double* %arrayidx, align 4 + %add = fadd double %tmp8, %conv + store double %add, double* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body5 + %inc = add nsw i32 %tmp, 1 + br label %for.cond2 + +for.end: ; preds = %for.cond2 + br label %for.inc10 + +for.inc10: ; preds = %for.end + %inc12 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end13: ; preds = %for.cond + ret void +} + +define i32 @main() nounwind { +entry: + call void @llvm.memset.p0i8.i32(i8* bitcast ([10 x double]* @A to i8*), i8 0, i32 40, i32 4, i1 false) + call void @loop_openmp() + ret i32 0 +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind + +; CHECK: %omp.userContext = alloca %loop_openmp.omp_subfn.omp.userContext +; CHECK: getelementptr inbounds %loop_openmp.omp_subfn.omp.userContext* %omp.userContext +; CHECK: %omp_data = bitcast %loop_openmp.omp_subfn.omp.userContext* %omp.userContext to i8* +; CHECK: @GOMP_parallel_loop_runtime_start(void (i8*)* @loop_openmp.omp_subfn, i8* %omp_data +; CHECK: call void @loop_openmp.omp_subfn(i8* %omp_data) +; CHECK: %omp.userContext1 = bitcast i8* %omp.userContext to %loop_openmp.omp_subfn.omp.userContext* diff --git a/polly/test/CodeGen/OpenMP/structnames.c b/polly/test/CodeGen/OpenMP/structnames.c new file mode 100644 index 00000000000..1cdb2e321f0 --- /dev/null +++ b/polly/test/CodeGen/OpenMP/structnames.c @@ -0,0 +1,26 @@ +#include <string.h> +#include <stdio.h> +#define N 5 + +float A[N]; +float B[N]; + +void loop1_openmp() { + for (int i = 0; i <= N; i++) + A[i] = 0; + + for (int j = 0; j <= N; j++) + for (int k = 0; k <= N; k++) + B[k] += j; +} + +int main () { + int i; + memset(A, 0, sizeof(float) * N); + memset(B, 0, sizeof(float) * N); + + loop1_openmp(); + + return 0; +} + diff --git a/polly/test/CodeGen/OpenMP/structnames.ll b/polly/test/CodeGen/OpenMP/structnames.ll new file mode 100644 index 00000000000..62170b7abce --- /dev/null +++ b/polly/test/CodeGen/OpenMP/structnames.ll @@ -0,0 +1,76 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-openmp -S %s | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +@A = common global [5 x float] zeroinitializer, align 4 +@B = common global [5 x float] zeroinitializer, align 4 + +define void @loop1_openmp() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %arrayidx = getelementptr [5 x float]* @A, i32 0, i32 %i.0 + %exitcond2 = icmp ne i32 %i.0, 6 + br i1 %exitcond2, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store float 0.000000e+00, float* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + br label %for.cond4 + +for.cond4: ; preds = %for.inc21, %for.end + %tmp = phi i32 [ 0, %for.end ], [ %inc23, %for.inc21 ] + %exitcond1 = icmp ne i32 %tmp, 6 + br i1 %exitcond1, label %for.body7, label %for.end24 + +for.body7: ; preds = %for.cond4 + br label %for.cond9 + +for.cond9: ; preds = %for.inc17, %for.body7 + %k.0 = phi i32 [ 0, %for.body7 ], [ %inc19, %for.inc17 ] + %arrayidx15 = getelementptr [5 x float]* @B, i32 0, i32 %k.0 + %exitcond = icmp ne i32 %k.0, 6 + br i1 %exitcond, label %for.body12, label %for.end20 + +for.body12: ; preds = %for.cond9 + %conv = sitofp i32 %tmp to float + %tmp16 = load float* %arrayidx15, align 4 + %add = fadd float %tmp16, %conv + store float %add, float* %arrayidx15, align 4 + br label %for.inc17 + +for.inc17: ; preds = %for.body12 + %inc19 = add nsw i32 %k.0, 1 + br label %for.cond9 + +for.end20: ; preds = %for.cond9 + br label %for.inc21 + +for.inc21: ; preds = %for.end20 + %inc23 = add nsw i32 %tmp, 1 + br label %for.cond4 + +for.end24: ; preds = %for.cond4 + ret void +} + +define i32 @main() nounwind { +entry: + call void @llvm.memset.p0i8.i32(i8* bitcast ([5 x float]* @A to i8*), i8 0, i32 20, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* bitcast ([5 x float]* @B to i8*), i8 0, i32 20, i32 4, i1 false) + call void @loop1_openmp() + ret i32 0 +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind + +; CHECK: loop1_openmp.omp_subfn.omp.userContext +; CHECK: loop1_openmp.omp_subfn1.omp.userContext diff --git a/polly/test/CodeGen/OpenMP/two_loop.c b/polly/test/CodeGen/OpenMP/two_loop.c new file mode 100644 index 00000000000..984e48f1f2c --- /dev/null +++ b/polly/test/CodeGen/OpenMP/two_loop.c @@ -0,0 +1,24 @@ +#include <string.h> +#define N 10240000 + +float A[N]; +float B[N]; + +void loop1_openmp() { + for (int i = 0; i <= N; i++) + A[i] = 0; + for (int j = 0; j <= N; j++) + B[j] = 0; +} + + +int main () { + int i; + memset(A, 0, sizeof(float) * N); + memset(B, 1, sizeof(float) * N); + + loop1_openmp(); + + return 0; +} + diff --git a/polly/test/CodeGen/OpenMP/two_loop.ll b/polly/test/CodeGen/OpenMP/two_loop.ll new file mode 100644 index 00000000000..cb95178491a --- /dev/null +++ b/polly/test/CodeGen/OpenMP/two_loop.ll @@ -0,0 +1,56 @@ +; ModuleID = 'two_loop.s' +; RUN: opt %loadPolly %defaultOpts -mem2reg -polly-codegen -enable-polly-openmp -S < %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +@A = common global [10240000 x float] zeroinitializer, align 4 +@B = common global [10240000 x float] zeroinitializer, align 4 + +define void @loop1_openmp() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %arrayidx = getelementptr [10240000 x float]* @A, i32 0, i32 %i.0 + %exitcond1 = icmp ne i32 %i.0, 10240001 + br i1 %exitcond1, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store float 0.000000e+00, float* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + br label %for.cond4 + +for.cond4: ; preds = %for.inc10, %for.end + %j.0 = phi i32 [ 0, %for.end ], [ %inc12, %for.inc10 ] + %arrayidx9 = getelementptr [10240000 x float]* @B, i32 0, i32 %j.0 + %exitcond = icmp ne i32 %j.0, 10240001 + br i1 %exitcond, label %for.body7, label %for.end13 + +for.body7: ; preds = %for.cond4 + store float 0.000000e+00, float* %arrayidx9, align 4 + br label %for.inc10 + +for.inc10: ; preds = %for.body7 + %inc12 = add nsw i32 %j.0, 1 + br label %for.cond4 + +for.end13: ; preds = %for.cond4 + ret void +} + +define i32 @main() nounwind { +entry: + call void @llvm.memset.p0i8.i32(i8* bitcast ([10240000 x float]* @A to i8*), i8 0, i32 40960000, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* bitcast ([10240000 x float]* @B to i8*), i8 1, i32 40960000, i32 4, i1 false) + call void @loop1_openmp() + ret i32 0 +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind diff --git a/polly/test/CodeGen/PHIInExit.ll b/polly/test/CodeGen/PHIInExit.ll new file mode 100644 index 00000000000..93cb9e685d5 --- /dev/null +++ b/polly/test/CodeGen/PHIInExit.ll @@ -0,0 +1,77 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t } +%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* } +%union.pthread_attr_t = type { i64, [12 x i32] } +%union.pthread_mutex_t = type { %struct..0__pthread_mutex_s } +%union.pthread_mutexattr_t = type { i32 } + +@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0] +@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0] +@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0] +@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0] +@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel ; <i32 (i64)*> [#uses=0] +@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%union.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%union.pthread_mutex_t*)*> [#uses=0] +@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%union.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%union.pthread_mutex_t*)*> [#uses=0] +@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%union.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%union.pthread_mutex_t*)*> [#uses=0] +@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%union.pthread_mutex_t*, %union.pthread_mutexattr_t*)* @pthread_mutex_init ; <i32 (%union.pthread_mutex_t*, %union.pthread_mutexattr_t*)*> [#uses=0] +@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0] +@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0] +@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%union.pthread_mutexattr_t*)* @pthread_mutexattr_init ; <i32 (%union.pthread_mutexattr_t*)*> [#uses=0] +@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%union.pthread_mutexattr_t*, i32)* @pthread_mutexattr_settype ; <i32 (%union.pthread_mutexattr_t*, i32)*> [#uses=0] +@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%union.pthread_mutexattr_t*)* @pthread_mutexattr_destroy ; <i32 (%union.pthread_mutexattr_t*)*> [#uses=0] + +define void @_ZL6createP6node_tii3v_tS1_d() { +entry: + br i1 undef, label %bb, label %bb5 + +bb: ; preds = %entry + br i1 false, label %bb1, label %bb3 + +bb1: ; preds = %bb + br label %bb3 + +bb3: ; preds = %bb1, %bb + %iftmp.99.0 = phi i64 [ undef, %bb1 ], [ 1, %bb ] ; <i64> [#uses=0] + br label %bb5 + +bb5: ; preds = %bb3, %entry + br i1 undef, label %return, label %bb7 + +bb7: ; preds = %bb5 + unreachable + +return: ; preds = %bb5 + ret void +} + +declare i32 @pthread_once(i32*, void ()*) + +declare i8* @pthread_getspecific(i32) + +declare i32 @pthread_setspecific(i32, i8*) + +declare i32 @pthread_create(i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*) + +declare i32 @pthread_cancel(i64) + +declare i32 @pthread_mutex_lock(%union.pthread_mutex_t*) + +declare i32 @pthread_mutex_trylock(%union.pthread_mutex_t*) + +declare i32 @pthread_mutex_unlock(%union.pthread_mutex_t*) + +declare i32 @pthread_mutex_init(%union.pthread_mutex_t*, %union.pthread_mutexattr_t*) + +declare i32 @pthread_key_create(i32*, void (i8*)*) + +declare i32 @pthread_key_delete(i32) + +declare i32 @pthread_mutexattr_init(%union.pthread_mutexattr_t*) + +declare i32 @pthread_mutexattr_settype(%union.pthread_mutexattr_t*, i32) + +declare i32 @pthread_mutexattr_destroy(%union.pthread_mutexattr_t*) diff --git a/polly/test/CodeGen/constant_condition.c b/polly/test/CodeGen/constant_condition.c new file mode 100644 index 00000000000..171927c6fa2 --- /dev/null +++ b/polly/test/CodeGen/constant_condition.c @@ -0,0 +1,23 @@ +#include <string.h> +int A[1]; + +void constant_condition () { + int a = 0; + int b = 0; + + if (a == b) + A[0] = 0; + else + A[0] = 1; +} + +int main () { + int i; + + A[0] = 2; + + constant_condition(); + + return A[0]; +} + diff --git a/polly/test/CodeGen/constant_condition.ll b/polly/test/CodeGen/constant_condition.ll new file mode 100644 index 00000000000..84265cbd105 --- /dev/null +++ b/polly/test/CodeGen/constant_condition.ll @@ -0,0 +1,34 @@ +;RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" +@A = common global [1 x i32] zeroinitializer, align 4 ; <[1 x i32]*> [#uses=1] + +define void @constant_condition() nounwind { +bb: + %tmp = icmp eq i32 0, 0 ; <i1> [#uses=0] + br i1 true, label %bb1, label %bb2 + +bb1: ; preds = %bb + store i32 0, i32* getelementptr inbounds ([1 x i32]* @A, i32 0, i32 0) + br label %bb3 + +bb2: ; preds = %bb + store i32 1, i32* getelementptr inbounds ([1 x i32]* @A, i32 0, i32 0) + br label %bb3 + +bb3: ; preds = %bb2, %bb1 + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +define i32 @main() nounwind { +bb: + store i32 2, i32* getelementptr inbounds ([1 x i32]* @A, i32 0, i32 0) + call void @constant_condition() + %tmp = load i32* getelementptr inbounds ([1 x i32]* @A, i32 0, i32 0) ; <i32> [#uses=1] + ret i32 %tmp +} + + +; CHECK: Stmt_bb1(); diff --git a/polly/test/CodeGen/do_pluto_matmult.c b/polly/test/CodeGen/do_pluto_matmult.c new file mode 100644 index 00000000000..d5509698e5b --- /dev/null +++ b/polly/test/CodeGen/do_pluto_matmult.c @@ -0,0 +1,74 @@ +#define M 36 +#define N 36 +#define K 36 +#define alpha 1 +#define beta 1 +double A[M][K+13]; +double B[K][N+13]; +double C[M][N+13]; + +#include <stdio.h> + +void init_array() +{ + int i, j; + + for (i=0; i<N; i++) { + for (j=0; j<N; j++) { + A[i][j] = (i + j); + // We do not want to optimize this. + __sync_synchronize(); + B[i][j] = (double)(i*j); + C[i][j] = 0.0; + } + } +} + + +void print_array() +{ + int i, j; + + for (i=0; i<N; i++) { + for (j=0; j<N; j++) { + fprintf(stdout, "%lf ", C[i][j]); + if (j%80 == 79) fprintf(stdout, "\n"); + } + fprintf(stdout, "\n"); + } +} + + +void do_pluto_matmult(void) { + int i, j, k; + + __sync_synchronize(); + i = 0; + do { + j = 0; + do { + k = 0; + do { + C[i][j] = beta*C[i][j] + alpha*A[i][k] * B[k][j]; + ++k; + } while (k < K); + ++j; + } while (j < N); + ++i; + } while (i < M); + __sync_synchronize(); +} + +int main() +{ + register double s; + + init_array(); + +#pragma scop + do_pluto_matmult(); +#pragma endscop + print_array(); + + return 0; +} diff --git a/polly/test/CodeGen/do_pluto_matmult.ll b/polly/test/CodeGen/do_pluto_matmult.ll new file mode 100644 index 00000000000..970134fec55 --- /dev/null +++ b/polly/test/CodeGen/do_pluto_matmult.ll @@ -0,0 +1,231 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen -disable-output < %s +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=`dirname %s` -polly-cloog -analyze < %s | FileCheck -check-prefix=IMPORT %s +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=`dirname %s` -polly-import-jscop-postfix=valid_reverse -polly-cloog -analyze < %s | FileCheck -check-prefix=REVERSE %s > /dev/null +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=`dirname %s` -polly-import-jscop-postfix=invalid_reverse -polly-cloog -analyze < %s 2>&1 | FileCheck -check-prefix=INVALID %s > /dev/null +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=`dirname %s` -polly-cloog -analyze < %s | FileCheck -check-prefix=IMPORT %s +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=`dirname %s` -polly-codegen < %s | lli | diff %s.result - +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=`dirname %s` -polly-codegen -S < %s | FileCheck -check-prefix=CODEGEN %s + + +; ModuleID = 'do_pluto_matmult.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [36 x [49 x double]] zeroinitializer, align 8 ; <[36 x [49 x double]]*> [#uses=3] +@B = common global [36 x [49 x double]] zeroinitializer, align 8 ; <[36 x [49 x double]]*> [#uses=3] +@C = common global [36 x [49 x double]] zeroinitializer, align 8 ; <[36 x [49 x double]]*> [#uses=4] +@stdout = external global %struct._IO_FILE* ; <%struct._IO_FILE**> [#uses=3] +@.str = private constant [5 x i8] c"%lf \00" ; <[5 x i8]*> [#uses=1] +@.str1 = private constant [2 x i8] c"\0A\00" ; <[2 x i8]*> [#uses=1] + +define void @init_array() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc29, %entry + %indvar1 = phi i64 [ %indvar.next2, %for.inc29 ], [ 0, %entry ] ; <i64> [#uses=7] + %exitcond6 = icmp ne i64 %indvar1, 36 ; <i1> [#uses=1] + br i1 %exitcond6, label %for.body, label %for.end32 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ] ; <i64> [#uses=7] + %tmp7 = add i64 %indvar1, %indvar ; <i64> [#uses=1] + %add = trunc i64 %tmp7 to i32 ; <i32> [#uses=1] + %arrayidx10 = getelementptr [36 x [49 x double]]* @A, i64 0, i64 %indvar1, i64 %indvar ; <double*> [#uses=1] + %tmp9 = mul i64 %indvar1, %indvar ; <i64> [#uses=1] + %mul = trunc i64 %tmp9 to i32 ; <i32> [#uses=1] + %arrayidx20 = getelementptr [36 x [49 x double]]* @B, i64 0, i64 %indvar1, i64 %indvar ; <double*> [#uses=1] + %arrayidx27 = getelementptr [36 x [49 x double]]* @C, i64 0, i64 %indvar1, i64 %indvar ; <double*> [#uses=1] + %exitcond = icmp ne i64 %indvar, 36 ; <i1> [#uses=1] + br i1 %exitcond, label %for.body4, label %for.end + +for.body4: ; preds = %for.cond1 + %conv = sitofp i32 %add to double ; <double> [#uses=1] + store double %conv, double* %arrayidx10 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %conv13 = sitofp i32 %mul to double ; <double> [#uses=1] + store double %conv13, double* %arrayidx20 + store double 0.000000e+00, double* %arrayidx27 + br label %for.inc + +for.inc: ; preds = %for.body4 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc29 + +for.inc29: ; preds = %for.end + %indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1] + br label %for.cond + +for.end32: ; preds = %for.cond + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +define void @print_array() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc18, %entry + %indvar1 = phi i64 [ %indvar.next2, %for.inc18 ], [ 0, %entry ] ; <i64> [#uses=3] + %exitcond3 = icmp ne i64 %indvar1, 36 ; <i1> [#uses=1] + br i1 %exitcond3, label %for.body, label %for.end21 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ] ; <i64> [#uses=3] + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] ; <i32> [#uses=2] + %arrayidx9 = getelementptr [36 x [49 x double]]* @C, i64 0, i64 %indvar1, i64 %indvar ; <double*> [#uses=1] + %exitcond = icmp ne i64 %indvar, 36 ; <i1> [#uses=1] + br i1 %exitcond, label %for.body4, label %for.end + +for.body4: ; preds = %for.cond1 + %tmp5 = load %struct._IO_FILE** @stdout ; <%struct._IO_FILE*> [#uses=1] + %tmp10 = load double* %arrayidx9 ; <double> [#uses=1] + %call = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %tmp5, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %tmp10) ; <i32> [#uses=0] + %cmp12 = icmp eq i32 %j.0, 79 ; <i1> [#uses=1] + br i1 %cmp12, label %if.then, label %if.end + +if.then: ; preds = %for.body4 + %tmp13 = load %struct._IO_FILE** @stdout ; <%struct._IO_FILE*> [#uses=1] + %call14 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %tmp13, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0)) ; <i32> [#uses=0] + br label %if.end + +if.end: ; preds = %if.then, %for.body4 + br label %for.inc + +for.inc: ; preds = %if.end + %inc = add nsw i32 %j.0, 1 ; <i32> [#uses=1] + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %for.cond1 + +for.end: ; preds = %for.cond1 + %tmp16 = load %struct._IO_FILE** @stdout ; <%struct._IO_FILE*> [#uses=1] + %call17 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %tmp16, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0)) ; <i32> [#uses=0] + br label %for.inc18 + +for.inc18: ; preds = %for.end + %indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1] + br label %for.cond + +for.end21: ; preds = %for.cond + ret void +} + +declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) + +define void @do_pluto_matmult() nounwind { +entry: + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %do.body + +do.body: ; preds = %do.cond42, %entry + %indvar3 = phi i64 [ %indvar.next4, %do.cond42 ], [ 0, %entry ] ; <i64> [#uses=3] + br label %do.body1 + +do.body1: ; preds = %do.cond36, %do.body + %indvar1 = phi i64 [ %indvar.next2, %do.cond36 ], [ 0, %do.body ] ; <i64> [#uses=3] + %arrayidx5 = getelementptr [36 x [49 x double]]* @C, i64 0, i64 %indvar3, i64 %indvar1 ; <double*> [#uses=2] + br label %do.body2 + +do.body2: ; preds = %do.cond, %do.body1 + %indvar = phi i64 [ %indvar.next, %do.cond ], [ 0, %do.body1 ] ; <i64> [#uses=3] + %arrayidx13 = getelementptr [36 x [49 x double]]* @A, i64 0, i64 %indvar3, i64 %indvar ; <double*> [#uses=1] + %arrayidx22 = getelementptr [36 x [49 x double]]* @B, i64 0, i64 %indvar, i64 %indvar1 ; <double*> [#uses=1] + %tmp6 = load double* %arrayidx5 ; <double> [#uses=1] + %mul = fmul double 1.000000e+00, %tmp6 ; <double> [#uses=1] + %tmp14 = load double* %arrayidx13 ; <double> [#uses=1] + %mul15 = fmul double 1.000000e+00, %tmp14 ; <double> [#uses=1] + %tmp23 = load double* %arrayidx22 ; <double> [#uses=1] + %mul24 = fmul double %mul15, %tmp23 ; <double> [#uses=1] + %add = fadd double %mul, %mul24 ; <double> [#uses=1] + store double %add, double* %arrayidx5 + br label %do.cond + +do.cond: ; preds = %do.body2 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] + %exitcond = icmp ne i64 %indvar.next, 36 ; <i1> [#uses=1] + br i1 %exitcond, label %do.body2, label %do.end + +do.end: ; preds = %do.cond + br label %do.cond36 + +do.cond36: ; preds = %do.end + %indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=2] + %exitcond5 = icmp ne i64 %indvar.next2, 36 ; <i1> [#uses=1] + br i1 %exitcond5, label %do.body1, label %do.end39 + +do.end39: ; preds = %do.cond36 + br label %do.cond42 + +do.cond42: ; preds = %do.end39 + %indvar.next4 = add i64 %indvar3, 1 ; <i64> [#uses=2] + %exitcond6 = icmp ne i64 %indvar.next4, 36 ; <i1> [#uses=1] + br i1 %exitcond6, label %do.body, label %do.end45 + +do.end45: ; preds = %do.cond42 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + ret void +} + +define i32 @main() nounwind { +entry: + call void @init_array() + call void @do_pluto_matmult() + call void @print_array() + ret i32 0 +} + +; CHECK: for (c2=0;c2<=35;c2++) { +; CHECK: for (c4=0;c4<=35;c4++) { +; CHECK: for (c6=0;c6<=35;c6++) { +; CHECK: Stmt_do_body2(c2,c4,c6); +; CHECK: } +; CHECK: } +; CHECK: } + + +; Do not dump the complete CLooG output. New CLooG version optimize more +; in this test case. +; IMPORT: for (c2=0;c2<=35;c2+=4) { +; IMPORT: c3<=min(35,c2+3);c3++) { +; IMPORT: for (c6=0;c6<=35;c6+=4) { +; IMPORT: c7<=min(35,c6+3);c7++) { +; IMPORT: for (c10=0;c10<=35;c10+=4) { +; IMPORT: c11<=min(35,c10+3);c11++) +; IMPORT: { +; IMPORT: Stmt_do_body2(c3,c7,c11); +; IMPORT: } +; IMPORT: } +; IMPORT: } +; IMPORT: } +; IMPORT: } +; IMPORT: } + + +; CODEGEN: polly.stmt_do.body2 + +; REVERSE: for (c2=-35;c2<=0;c2++) { +; REVERSE: for (c4=-35;c4<=0;c4++) { +; REVERSE: for (c6=0;c6<=35;c6++) { +; REVERSE: Stmt_do_body2(-c2,-c4,c6); +; REVERSE: } +; REVERSE: } +; REVERSE: } + +; INVALID: file contains a scattering that changes the dependences. + + + diff --git a/polly/test/CodeGen/do_pluto_matmult.ll.result b/polly/test/CodeGen/do_pluto_matmult.ll.result new file mode 100644 index 00000000000..7254e4a9a7d --- /dev/null +++ b/polly/test/CodeGen/do_pluto_matmult.ll.result @@ -0,0 +1,36 @@ +0.000000 14910.000000 29820.000000 44730.000000 59640.000000 74550.000000 89460.000000 104370.000000 119280.000000 134190.000000 149100.000000 164010.000000 178920.000000 193830.000000 208740.000000 223650.000000 238560.000000 253470.000000 268380.000000 283290.000000 298200.000000 313110.000000 328020.000000 342930.000000 357840.000000 372750.000000 387660.000000 402570.000000 417480.000000 432390.000000 447300.000000 462210.000000 477120.000000 492030.000000 506940.000000 521850.000000 +0.000000 15540.000000 31080.000000 46620.000000 62160.000000 77700.000000 93240.000000 108780.000000 124320.000000 139860.000000 155400.000000 170940.000000 186480.000000 202020.000000 217560.000000 233100.000000 248640.000000 264180.000000 279720.000000 295260.000000 310800.000000 326340.000000 341880.000000 357420.000000 372960.000000 388500.000000 404040.000000 419580.000000 435120.000000 450660.000000 466200.000000 481740.000000 497280.000000 512820.000000 528360.000000 543900.000000 +0.000000 16170.000000 32340.000000 48510.000000 64680.000000 80850.000000 97020.000000 113190.000000 129360.000000 145530.000000 161700.000000 177870.000000 194040.000000 210210.000000 226380.000000 242550.000000 258720.000000 274890.000000 291060.000000 307230.000000 323400.000000 339570.000000 355740.000000 371910.000000 388080.000000 404250.000000 420420.000000 436590.000000 452760.000000 468930.000000 485100.000000 501270.000000 517440.000000 533610.000000 549780.000000 565950.000000 +0.000000 16800.000000 33600.000000 50400.000000 67200.000000 84000.000000 100800.000000 117600.000000 134400.000000 151200.000000 168000.000000 184800.000000 201600.000000 218400.000000 235200.000000 252000.000000 268800.000000 285600.000000 302400.000000 319200.000000 336000.000000 352800.000000 369600.000000 386400.000000 403200.000000 420000.000000 436800.000000 453600.000000 470400.000000 487200.000000 504000.000000 520800.000000 537600.000000 554400.000000 571200.000000 588000.000000 +0.000000 17430.000000 34860.000000 52290.000000 69720.000000 87150.000000 104580.000000 122010.000000 139440.000000 156870.000000 174300.000000 191730.000000 209160.000000 226590.000000 244020.000000 261450.000000 278880.000000 296310.000000 313740.000000 331170.000000 348600.000000 366030.000000 383460.000000 400890.000000 418320.000000 435750.000000 453180.000000 470610.000000 488040.000000 505470.000000 522900.000000 540330.000000 557760.000000 575190.000000 592620.000000 610050.000000 +0.000000 18060.000000 36120.000000 54180.000000 72240.000000 90300.000000 108360.000000 126420.000000 144480.000000 162540.000000 180600.000000 198660.000000 216720.000000 234780.000000 252840.000000 270900.000000 288960.000000 307020.000000 325080.000000 343140.000000 361200.000000 379260.000000 397320.000000 415380.000000 433440.000000 451500.000000 469560.000000 487620.000000 505680.000000 523740.000000 541800.000000 559860.000000 577920.000000 595980.000000 614040.000000 632100.000000 +0.000000 18690.000000 37380.000000 56070.000000 74760.000000 93450.000000 112140.000000 130830.000000 149520.000000 168210.000000 186900.000000 205590.000000 224280.000000 242970.000000 261660.000000 280350.000000 299040.000000 317730.000000 336420.000000 355110.000000 373800.000000 392490.000000 411180.000000 429870.000000 448560.000000 467250.000000 485940.000000 504630.000000 523320.000000 542010.000000 560700.000000 579390.000000 598080.000000 616770.000000 635460.000000 654150.000000 +0.000000 19320.000000 38640.000000 57960.000000 77280.000000 96600.000000 115920.000000 135240.000000 154560.000000 173880.000000 193200.000000 212520.000000 231840.000000 251160.000000 270480.000000 289800.000000 309120.000000 328440.000000 347760.000000 367080.000000 386400.000000 405720.000000 425040.000000 444360.000000 463680.000000 483000.000000 502320.000000 521640.000000 540960.000000 560280.000000 579600.000000 598920.000000 618240.000000 637560.000000 656880.000000 676200.000000 +0.000000 19950.000000 39900.000000 59850.000000 79800.000000 99750.000000 119700.000000 139650.000000 159600.000000 179550.000000 199500.000000 219450.000000 239400.000000 259350.000000 279300.000000 299250.000000 319200.000000 339150.000000 359100.000000 379050.000000 399000.000000 418950.000000 438900.000000 458850.000000 478800.000000 498750.000000 518700.000000 538650.000000 558600.000000 578550.000000 598500.000000 618450.000000 638400.000000 658350.000000 678300.000000 698250.000000 +0.000000 20580.000000 41160.000000 61740.000000 82320.000000 102900.000000 123480.000000 144060.000000 164640.000000 185220.000000 205800.000000 226380.000000 246960.000000 267540.000000 288120.000000 308700.000000 329280.000000 349860.000000 370440.000000 391020.000000 411600.000000 432180.000000 452760.000000 473340.000000 493920.000000 514500.000000 535080.000000 555660.000000 576240.000000 596820.000000 617400.000000 637980.000000 658560.000000 679140.000000 699720.000000 720300.000000 +0.000000 21210.000000 42420.000000 63630.000000 84840.000000 106050.000000 127260.000000 148470.000000 169680.000000 190890.000000 212100.000000 233310.000000 254520.000000 275730.000000 296940.000000 318150.000000 339360.000000 360570.000000 381780.000000 402990.000000 424200.000000 445410.000000 466620.000000 487830.000000 509040.000000 530250.000000 551460.000000 572670.000000 593880.000000 615090.000000 636300.000000 657510.000000 678720.000000 699930.000000 721140.000000 742350.000000 +0.000000 21840.000000 43680.000000 65520.000000 87360.000000 109200.000000 131040.000000 152880.000000 174720.000000 196560.000000 218400.000000 240240.000000 262080.000000 283920.000000 305760.000000 327600.000000 349440.000000 371280.000000 393120.000000 414960.000000 436800.000000 458640.000000 480480.000000 502320.000000 524160.000000 546000.000000 567840.000000 589680.000000 611520.000000 633360.000000 655200.000000 677040.000000 698880.000000 720720.000000 742560.000000 764400.000000 +0.000000 22470.000000 44940.000000 67410.000000 89880.000000 112350.000000 134820.000000 157290.000000 179760.000000 202230.000000 224700.000000 247170.000000 269640.000000 292110.000000 314580.000000 337050.000000 359520.000000 381990.000000 404460.000000 426930.000000 449400.000000 471870.000000 494340.000000 516810.000000 539280.000000 561750.000000 584220.000000 606690.000000 629160.000000 651630.000000 674100.000000 696570.000000 719040.000000 741510.000000 763980.000000 786450.000000 +0.000000 23100.000000 46200.000000 69300.000000 92400.000000 115500.000000 138600.000000 161700.000000 184800.000000 207900.000000 231000.000000 254100.000000 277200.000000 300300.000000 323400.000000 346500.000000 369600.000000 392700.000000 415800.000000 438900.000000 462000.000000 485100.000000 508200.000000 531300.000000 554400.000000 577500.000000 600600.000000 623700.000000 646800.000000 669900.000000 693000.000000 716100.000000 739200.000000 762300.000000 785400.000000 808500.000000 +0.000000 23730.000000 47460.000000 71190.000000 94920.000000 118650.000000 142380.000000 166110.000000 189840.000000 213570.000000 237300.000000 261030.000000 284760.000000 308490.000000 332220.000000 355950.000000 379680.000000 403410.000000 427140.000000 450870.000000 474600.000000 498330.000000 522060.000000 545790.000000 569520.000000 593250.000000 616980.000000 640710.000000 664440.000000 688170.000000 711900.000000 735630.000000 759360.000000 783090.000000 806820.000000 830550.000000 +0.000000 24360.000000 48720.000000 73080.000000 97440.000000 121800.000000 146160.000000 170520.000000 194880.000000 219240.000000 243600.000000 267960.000000 292320.000000 316680.000000 341040.000000 365400.000000 389760.000000 414120.000000 438480.000000 462840.000000 487200.000000 511560.000000 535920.000000 560280.000000 584640.000000 609000.000000 633360.000000 657720.000000 682080.000000 706440.000000 730800.000000 755160.000000 779520.000000 803880.000000 828240.000000 852600.000000 +0.000000 24990.000000 49980.000000 74970.000000 99960.000000 124950.000000 149940.000000 174930.000000 199920.000000 224910.000000 249900.000000 274890.000000 299880.000000 324870.000000 349860.000000 374850.000000 399840.000000 424830.000000 449820.000000 474810.000000 499800.000000 524790.000000 549780.000000 574770.000000 599760.000000 624750.000000 649740.000000 674730.000000 699720.000000 724710.000000 749700.000000 774690.000000 799680.000000 824670.000000 849660.000000 874650.000000 +0.000000 25620.000000 51240.000000 76860.000000 102480.000000 128100.000000 153720.000000 179340.000000 204960.000000 230580.000000 256200.000000 281820.000000 307440.000000 333060.000000 358680.000000 384300.000000 409920.000000 435540.000000 461160.000000 486780.000000 512400.000000 538020.000000 563640.000000 589260.000000 614880.000000 640500.000000 666120.000000 691740.000000 717360.000000 742980.000000 768600.000000 794220.000000 819840.000000 845460.000000 871080.000000 896700.000000 +0.000000 26250.000000 52500.000000 78750.000000 105000.000000 131250.000000 157500.000000 183750.000000 210000.000000 236250.000000 262500.000000 288750.000000 315000.000000 341250.000000 367500.000000 393750.000000 420000.000000 446250.000000 472500.000000 498750.000000 525000.000000 551250.000000 577500.000000 603750.000000 630000.000000 656250.000000 682500.000000 708750.000000 735000.000000 761250.000000 787500.000000 813750.000000 840000.000000 866250.000000 892500.000000 918750.000000 +0.000000 26880.000000 53760.000000 80640.000000 107520.000000 134400.000000 161280.000000 188160.000000 215040.000000 241920.000000 268800.000000 295680.000000 322560.000000 349440.000000 376320.000000 403200.000000 430080.000000 456960.000000 483840.000000 510720.000000 537600.000000 564480.000000 591360.000000 618240.000000 645120.000000 672000.000000 698880.000000 725760.000000 752640.000000 779520.000000 806400.000000 833280.000000 860160.000000 887040.000000 913920.000000 940800.000000 +0.000000 27510.000000 55020.000000 82530.000000 110040.000000 137550.000000 165060.000000 192570.000000 220080.000000 247590.000000 275100.000000 302610.000000 330120.000000 357630.000000 385140.000000 412650.000000 440160.000000 467670.000000 495180.000000 522690.000000 550200.000000 577710.000000 605220.000000 632730.000000 660240.000000 687750.000000 715260.000000 742770.000000 770280.000000 797790.000000 825300.000000 852810.000000 880320.000000 907830.000000 935340.000000 962850.000000 +0.000000 28140.000000 56280.000000 84420.000000 112560.000000 140700.000000 168840.000000 196980.000000 225120.000000 253260.000000 281400.000000 309540.000000 337680.000000 365820.000000 393960.000000 422100.000000 450240.000000 478380.000000 506520.000000 534660.000000 562800.000000 590940.000000 619080.000000 647220.000000 675360.000000 703500.000000 731640.000000 759780.000000 787920.000000 816060.000000 844200.000000 872340.000000 900480.000000 928620.000000 956760.000000 984900.000000 +0.000000 28770.000000 57540.000000 86310.000000 115080.000000 143850.000000 172620.000000 201390.000000 230160.000000 258930.000000 287700.000000 316470.000000 345240.000000 374010.000000 402780.000000 431550.000000 460320.000000 489090.000000 517860.000000 546630.000000 575400.000000 604170.000000 632940.000000 661710.000000 690480.000000 719250.000000 748020.000000 776790.000000 805560.000000 834330.000000 863100.000000 891870.000000 920640.000000 949410.000000 978180.000000 1006950.000000 +0.000000 29400.000000 58800.000000 88200.000000 117600.000000 147000.000000 176400.000000 205800.000000 235200.000000 264600.000000 294000.000000 323400.000000 352800.000000 382200.000000 411600.000000 441000.000000 470400.000000 499800.000000 529200.000000 558600.000000 588000.000000 617400.000000 646800.000000 676200.000000 705600.000000 735000.000000 764400.000000 793800.000000 823200.000000 852600.000000 882000.000000 911400.000000 940800.000000 970200.000000 999600.000000 1029000.000000 +0.000000 30030.000000 60060.000000 90090.000000 120120.000000 150150.000000 180180.000000 210210.000000 240240.000000 270270.000000 300300.000000 330330.000000 360360.000000 390390.000000 420420.000000 450450.000000 480480.000000 510510.000000 540540.000000 570570.000000 600600.000000 630630.000000 660660.000000 690690.000000 720720.000000 750750.000000 780780.000000 810810.000000 840840.000000 870870.000000 900900.000000 930930.000000 960960.000000 990990.000000 1021020.000000 1051050.000000 +0.000000 30660.000000 61320.000000 91980.000000 122640.000000 153300.000000 183960.000000 214620.000000 245280.000000 275940.000000 306600.000000 337260.000000 367920.000000 398580.000000 429240.000000 459900.000000 490560.000000 521220.000000 551880.000000 582540.000000 613200.000000 643860.000000 674520.000000 705180.000000 735840.000000 766500.000000 797160.000000 827820.000000 858480.000000 889140.000000 919800.000000 950460.000000 981120.000000 1011780.000000 1042440.000000 1073100.000000 +0.000000 31290.000000 62580.000000 93870.000000 125160.000000 156450.000000 187740.000000 219030.000000 250320.000000 281610.000000 312900.000000 344190.000000 375480.000000 406770.000000 438060.000000 469350.000000 500640.000000 531930.000000 563220.000000 594510.000000 625800.000000 657090.000000 688380.000000 719670.000000 750960.000000 782250.000000 813540.000000 844830.000000 876120.000000 907410.000000 938700.000000 969990.000000 1001280.000000 1032570.000000 1063860.000000 1095150.000000 +0.000000 31920.000000 63840.000000 95760.000000 127680.000000 159600.000000 191520.000000 223440.000000 255360.000000 287280.000000 319200.000000 351120.000000 383040.000000 414960.000000 446880.000000 478800.000000 510720.000000 542640.000000 574560.000000 606480.000000 638400.000000 670320.000000 702240.000000 734160.000000 766080.000000 798000.000000 829920.000000 861840.000000 893760.000000 925680.000000 957600.000000 989520.000000 1021440.000000 1053360.000000 1085280.000000 1117200.000000 +0.000000 32550.000000 65100.000000 97650.000000 130200.000000 162750.000000 195300.000000 227850.000000 260400.000000 292950.000000 325500.000000 358050.000000 390600.000000 423150.000000 455700.000000 488250.000000 520800.000000 553350.000000 585900.000000 618450.000000 651000.000000 683550.000000 716100.000000 748650.000000 781200.000000 813750.000000 846300.000000 878850.000000 911400.000000 943950.000000 976500.000000 1009050.000000 1041600.000000 1074150.000000 1106700.000000 1139250.000000 +0.000000 33180.000000 66360.000000 99540.000000 132720.000000 165900.000000 199080.000000 232260.000000 265440.000000 298620.000000 331800.000000 364980.000000 398160.000000 431340.000000 464520.000000 497700.000000 530880.000000 564060.000000 597240.000000 630420.000000 663600.000000 696780.000000 729960.000000 763140.000000 796320.000000 829500.000000 862680.000000 895860.000000 929040.000000 962220.000000 995400.000000 1028580.000000 1061760.000000 1094940.000000 1128120.000000 1161300.000000 +0.000000 33810.000000 67620.000000 101430.000000 135240.000000 169050.000000 202860.000000 236670.000000 270480.000000 304290.000000 338100.000000 371910.000000 405720.000000 439530.000000 473340.000000 507150.000000 540960.000000 574770.000000 608580.000000 642390.000000 676200.000000 710010.000000 743820.000000 777630.000000 811440.000000 845250.000000 879060.000000 912870.000000 946680.000000 980490.000000 1014300.000000 1048110.000000 1081920.000000 1115730.000000 1149540.000000 1183350.000000 +0.000000 34440.000000 68880.000000 103320.000000 137760.000000 172200.000000 206640.000000 241080.000000 275520.000000 309960.000000 344400.000000 378840.000000 413280.000000 447720.000000 482160.000000 516600.000000 551040.000000 585480.000000 619920.000000 654360.000000 688800.000000 723240.000000 757680.000000 792120.000000 826560.000000 861000.000000 895440.000000 929880.000000 964320.000000 998760.000000 1033200.000000 1067640.000000 1102080.000000 1136520.000000 1170960.000000 1205400.000000 +0.000000 35070.000000 70140.000000 105210.000000 140280.000000 175350.000000 210420.000000 245490.000000 280560.000000 315630.000000 350700.000000 385770.000000 420840.000000 455910.000000 490980.000000 526050.000000 561120.000000 596190.000000 631260.000000 666330.000000 701400.000000 736470.000000 771540.000000 806610.000000 841680.000000 876750.000000 911820.000000 946890.000000 981960.000000 1017030.000000 1052100.000000 1087170.000000 1122240.000000 1157310.000000 1192380.000000 1227450.000000 +0.000000 35700.000000 71400.000000 107100.000000 142800.000000 178500.000000 214200.000000 249900.000000 285600.000000 321300.000000 357000.000000 392700.000000 428400.000000 464100.000000 499800.000000 535500.000000 571200.000000 606900.000000 642600.000000 678300.000000 714000.000000 749700.000000 785400.000000 821100.000000 856800.000000 892500.000000 928200.000000 963900.000000 999600.000000 1035300.000000 1071000.000000 1106700.000000 1142400.000000 1178100.000000 1213800.000000 1249500.000000 +0.000000 36330.000000 72660.000000 108990.000000 145320.000000 181650.000000 217980.000000 254310.000000 290640.000000 326970.000000 363300.000000 399630.000000 435960.000000 472290.000000 508620.000000 544950.000000 581280.000000 617610.000000 653940.000000 690270.000000 726600.000000 762930.000000 799260.000000 835590.000000 871920.000000 908250.000000 944580.000000 980910.000000 1017240.000000 1053570.000000 1089900.000000 1126230.000000 1162560.000000 1198890.000000 1235220.000000 1271550.000000 +0.000000 36960.000000 73920.000000 110880.000000 147840.000000 184800.000000 221760.000000 258720.000000 295680.000000 332640.000000 369600.000000 406560.000000 443520.000000 480480.000000 517440.000000 554400.000000 591360.000000 628320.000000 665280.000000 702240.000000 739200.000000 776160.000000 813120.000000 850080.000000 887040.000000 924000.000000 960960.000000 997920.000000 1034880.000000 1071840.000000 1108800.000000 1145760.000000 1182720.000000 1219680.000000 1256640.000000 1293600.000000 diff --git a/polly/test/CodeGen/do_pluto_matmult___%do.body---%do.end45.jscop b/polly/test/CodeGen/do_pluto_matmult___%do.body---%do.end45.jscop new file mode 100644 index 00000000000..009e895ee35 --- /dev/null +++ b/polly/test/CodeGen/do_pluto_matmult___%do.body---%do.end45.jscop @@ -0,0 +1,25 @@ +{ + "name": "do.body => do.end45", + "context": "{ [] }", + "statements": [{ + "name": "Stmt_do_body2", + "domain": "{ Stmt_do_body2[i0, i1, i2] : i0 >= 0 and i0 <= 35 and i1 >= 0 and i1 <= 35 and i2 >= 0 and i2 <= 35 }", + "schedule": "{ Stmt_do_body2[i0, i1, i2] -> scattering[0, o1, i0, o3, 0, o5, i1, o7, 0, o9, i2, o11, 0] : 4o7 = o5 and 4o11 = o9 and 4o3 = o1 and o1 <= i0 and o1 >= -3 + i0 and o5 <= i1 and o5 >= -3 + i1 and o9 <= i2 and o9 >= -3 + i2 }", + "accesses": [{ + "kind": "read", + "relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_C[49i0 + i1] }" + }, + { + "kind": "read", + "relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_A[49i0 + i2] }" + }, + { + "kind": "read", + "relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_B[i1 + 49i2] }" + }, + { + "kind": "write", + "relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_C[49i0 + i1] }" + }] + }] +}
\ No newline at end of file diff --git a/polly/test/CodeGen/do_pluto_matmult___%do.body---%do.end45.jscop.invalid_reverse b/polly/test/CodeGen/do_pluto_matmult___%do.body---%do.end45.jscop.invalid_reverse new file mode 100644 index 00000000000..0345beca7db --- /dev/null +++ b/polly/test/CodeGen/do_pluto_matmult___%do.body---%do.end45.jscop.invalid_reverse @@ -0,0 +1,25 @@ +{ + "name": "do.body => do.end45", + "context": "{ [] }", + "statements": [{ + "name": "Stmt_do_body2", + "domain": "{ Stmt_do_body2[i0, i1, i2] : i0 >= 0 and i0 <= 35 and i1 >= 0 and i1 <= 35 and i2 >= 0 and i2 <= 35 }", + "schedule": "{ Stmt_do_body2[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, -i2, 0] }", + "accesses": [{ + "kind": "read", + "relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_C[49i0 + i1] }" + }, + { + "kind": "read", + "relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_A[49i0 + i2] }" + }, + { + "kind": "read", + "relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_B[i1 + 49i2] }" + }, + { + "kind": "write", + "relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_C[49i0 + i1] }" + }] + }] +}
\ No newline at end of file diff --git a/polly/test/CodeGen/do_pluto_matmult___%do.body---%do.end45.jscop.valid_reverse b/polly/test/CodeGen/do_pluto_matmult___%do.body---%do.end45.jscop.valid_reverse new file mode 100644 index 00000000000..e7f01e07370 --- /dev/null +++ b/polly/test/CodeGen/do_pluto_matmult___%do.body---%do.end45.jscop.valid_reverse @@ -0,0 +1,25 @@ +{ + "name": "do.body => do.end45", + "context": "{ [] }", + "statements": [{ + "name": "Stmt_do_body2", + "domain": "{ Stmt_do_body2[i0, i1, i2] : i0 >= 0 and i0 <= 35 and i1 >= 0 and i1 <= 35 and i2 >= 0 and i2 <= 35 }", + "schedule": "{ Stmt_do_body2[i0, i1, i2] -> scattering[0, -i0, 0, -i1, 0, i2, 0] }", + "accesses": [{ + "kind": "read", + "relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_C[49i0 + i1] }" + }, + { + "kind": "read", + "relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_A[49i0 + i2] }" + }, + { + "kind": "read", + "relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_B[i1 + 49i2] }" + }, + { + "kind": "write", + "relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_C[49i0 + i1] }" + }] + }] +}
\ No newline at end of file diff --git a/polly/test/CodeGen/loop_with_condition.c b/polly/test/CodeGen/loop_with_condition.c new file mode 100644 index 00000000000..c27d211032f --- /dev/null +++ b/polly/test/CodeGen/loop_with_condition.c @@ -0,0 +1,39 @@ +#include <string.h> +#define N 1024 +int A[N]; +int B[N]; + +void loop_with_condition() { + int i; + + __sync_synchronize(); + for (i = 0; i < N; i++) { + if (i <= N / 2) + A[i] = 1; + else + A[i] = 2; + B[i] = 3; + } + __sync_synchronize(); +} + +int main () { + int i; + + memset(A, 0, sizeof(int) * N); + memset(B, 0, sizeof(int) * N); + + loop_with_condition(); + + for (i = 0; i < N; i++) + if (B[i] != 3) + return 1; + + for (i = 0; i < N; i++) + if (i <= N / 2 && A[i] != 1) + return 1; + else if (i > N / 2 && A[i] != 2) + return 1; + return 0; +} + diff --git a/polly/test/CodeGen/loop_with_condition.ll b/polly/test/CodeGen/loop_with_condition.ll new file mode 100644 index 00000000000..356848864ac --- /dev/null +++ b/polly/test/CodeGen/loop_with_condition.ll @@ -0,0 +1,142 @@ +; ModuleID = 'loop_with_condition.s' +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s | lli + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +@A = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=4] +@B = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=4] + +define void @loop_with_condition() nounwind { +; <label>:0 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %1 + +; <label>:1 ; preds = %7, %0 + %indvar = phi i64 [ %indvar.next, %7 ], [ 0, %0 ] ; <i64> [#uses=5] + %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=2] + %scevgep1 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar ; <i32*> [#uses=1] + %i.0 = trunc i64 %indvar to i32 ; <i32> [#uses=1] + %exitcond = icmp ne i64 %indvar, 1024 ; <i1> [#uses=1] + br i1 %exitcond, label %2, label %8 + +; <label>:2 ; preds = %1 + %3 = icmp sle i32 %i.0, 512 ; <i1> [#uses=1] + br i1 %3, label %4, label %5 + +; <label>:4 ; preds = %2 + store i32 1, i32* %scevgep + br label %6 + +; <label>:5 ; preds = %2 + store i32 2, i32* %scevgep + br label %6 + +; <label>:6 ; preds = %5, %4 + store i32 3, i32* %scevgep1 + br label %7 + +; <label>:7 ; preds = %6 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %1 + +; <label>:8 ; preds = %1 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +define i32 @main() nounwind { +; <label>:0 + call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @A to i8*), i8 0, i64 4096, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @B to i8*), i8 0, i64 4096, i32 1, i1 false) + call void @loop_with_condition() + br label %1 + +; <label>:1 ; preds = %8, %0 + %indvar1 = phi i64 [ %indvar.next2, %8 ], [ 0, %0 ] ; <i64> [#uses=3] + %scevgep3 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar1 ; <i32*> [#uses=1] + %i.0 = trunc i64 %indvar1 to i32 ; <i32> [#uses=1] + %2 = icmp slt i32 %i.0, 1024 ; <i1> [#uses=1] + br i1 %2, label %3, label %9 + +; <label>:3 ; preds = %1 + %4 = load i32* %scevgep3 ; <i32> [#uses=1] + %5 = icmp ne i32 %4, 3 ; <i1> [#uses=1] + br i1 %5, label %6, label %7 + +; <label>:6 ; preds = %3 + br label %28 + +; <label>:7 ; preds = %3 + br label %8 + +; <label>:8 ; preds = %7 + %indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1] + br label %1 + +; <label>:9 ; preds = %1 + br label %10 + +; <label>:10 ; preds = %26, %9 + %indvar = phi i64 [ %indvar.next, %26 ], [ 0, %9 ] ; <i64> [#uses=3] + %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=2] + %i.1 = trunc i64 %indvar to i32 ; <i32> [#uses=3] + %11 = icmp slt i32 %i.1, 1024 ; <i1> [#uses=1] + br i1 %11, label %12, label %27 + +; <label>:12 ; preds = %10 + %13 = icmp sle i32 %i.1, 512 ; <i1> [#uses=1] + br i1 %13, label %14, label %18 + +; <label>:14 ; preds = %12 + %15 = load i32* %scevgep ; <i32> [#uses=1] + %16 = icmp ne i32 %15, 1 ; <i1> [#uses=1] + br i1 %16, label %17, label %18 + +; <label>:17 ; preds = %14 + br label %28 + +; <label>:18 ; preds = %14, %12 + %19 = icmp sgt i32 %i.1, 512 ; <i1> [#uses=1] + br i1 %19, label %20, label %24 + +; <label>:20 ; preds = %18 + %21 = load i32* %scevgep ; <i32> [#uses=1] + %22 = icmp ne i32 %21, 2 ; <i1> [#uses=1] + br i1 %22, label %23, label %24 + +; <label>:23 ; preds = %20 + br label %28 + +; <label>:24 ; preds = %20, %18 + br label %25 + +; <label>:25 ; preds = %24 + br label %26 + +; <label>:26 ; preds = %25 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %10 + +; <label>:27 ; preds = %10 + br label %28 + +; <label>:28 ; preds = %27, %23, %17, %6 + %.0 = phi i32 [ 1, %6 ], [ 1, %17 ], [ 1, %23 ], [ 0, %27 ] ; <i32> [#uses=1] + ret i32 %.0 +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +; CHECK: for (c2=0;c2<=512;c2++) { +; CHECK: Stmt_4(c2); +; CHECK: Stmt_6(c2); +; CHECK: } +; CHECK: for (c2=513;c2<=1023;c2++) { +; CHECK: Stmt_5(c2); +; CHECK: Stmt_6(c2); +; CHECK: } + diff --git a/polly/test/CodeGen/loop_with_condition_2.ll b/polly/test/CodeGen/loop_with_condition_2.ll new file mode 100644 index 00000000000..cce3ab08dc1 --- /dev/null +++ b/polly/test/CodeGen/loop_with_condition_2.ll @@ -0,0 +1,143 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen %s | lli + +; ModuleID = 'loop_with_condition_2.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x i32] zeroinitializer, align 16 +@B = common global [1024 x i32] zeroinitializer, align 16 + +define void @loop_with_condition(i32 %m) nounwind { +entry: + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %tmp = sub i32 0, %m + %tmp1 = zext i32 %tmp to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar + %arrayidx10 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar + %tmp2 = add i64 %tmp1, %indvar + %sub = trunc i64 %tmp2 to i32 + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %cmp3 = icmp sle i32 %sub, 1024 + br i1 %cmp3, label %if.then, label %if.else + +if.then: ; preds = %for.body + store i32 1, i32* %arrayidx + br label %if.end + +if.else: ; preds = %for.body + store i32 2, i32* %arrayidx + br label %if.end + +if.end: ; preds = %if.else, %if.then + store i32 3, i32* %arrayidx10 + br label %for.inc + +for.inc: ; preds = %if.end + %indvar.next = add i64 %indvar, 1 + br label %for.cond + +for.end: ; preds = %for.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +define i32 @main() nounwind { +entry: + call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @A to i8*), i8 0, i64 4096, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @B to i8*), i8 0, i64 4096, i32 1, i1 false) + call void @loop_with_condition(i32 5) + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvar1 = phi i64 [ %indvar.next2, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar1 + %i.0 = trunc i64 %indvar1 to i32 + %cmp = icmp slt i32 %i.0, 1024 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = load i32* %arrayidx + %cmp4 = icmp ne i32 %tmp3, 3 + br i1 %cmp4, label %if.then, label %if.end + +if.then: ; preds = %for.body + br label %return + +if.end: ; preds = %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %indvar.next2 = add i64 %indvar1, 1 + br label %for.cond + +for.end: ; preds = %for.cond + br label %for.cond6 + +for.cond6: ; preds = %for.inc32, %for.end + %indvar = phi i64 [ %indvar.next, %for.inc32 ], [ 0, %for.end ] + %arrayidx15 = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar + %i.1 = trunc i64 %indvar to i32 + %cmp8 = icmp slt i32 %i.1, 1024 + br i1 %cmp8, label %for.body9, label %for.end35 + +for.body9: ; preds = %for.cond6 + br i1 true, label %land.lhs.true, label %if.else + +land.lhs.true: ; preds = %for.body9 + %tmp16 = load i32* %arrayidx15 + %cmp17 = icmp ne i32 %tmp16, 1 + br i1 %cmp17, label %if.then18, label %if.else + +if.then18: ; preds = %land.lhs.true + br label %return + +if.else: ; preds = %land.lhs.true, %for.body9 + br i1 false, label %land.lhs.true23, label %if.end30 + +land.lhs.true23: ; preds = %if.else + %tmp27 = load i32* %arrayidx15 + %cmp28 = icmp ne i32 %tmp27, 2 + br i1 %cmp28, label %if.then29, label %if.end30 + +if.then29: ; preds = %land.lhs.true23 + br label %return + +if.end30: ; preds = %land.lhs.true23, %if.else + br label %if.end31 + +if.end31: ; preds = %if.end30 + br label %for.inc32 + +for.inc32: ; preds = %if.end31 + %indvar.next = add i64 %indvar, 1 + br label %for.cond6 + +for.end35: ; preds = %for.cond6 + br label %return + +return: ; preds = %for.end35, %if.then29, %if.then18, %if.then + %retval.0 = phi i32 [ 1, %if.then ], [ 1, %if.then18 ], [ 1, %if.then29 ], [ 0, %for.end35 ] + ret i32 %retval.0 +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +; CHECK: for (c2=0;c2<=min(1023,M+1024);c2++) { +; CHECK: Stmt_if_then(c2); +; CHECK: Stmt_if_end(c2); +; CHECK: } +; CHECK: for (c2=max(0,M+1025);c2<=1023;c2++) { +; CHECK: Stmt_if_else(c2); +; CHECK: Stmt_if_end(c2); +; CHECK: } + diff --git a/polly/test/CodeGen/loop_with_condition_ineq.c b/polly/test/CodeGen/loop_with_condition_ineq.c new file mode 100644 index 00000000000..9d61922b290 --- /dev/null +++ b/polly/test/CodeGen/loop_with_condition_ineq.c @@ -0,0 +1,39 @@ +#include <string.h> +#define N 1024 +int A[N]; +int B[N]; + +void loop_with_condition_ineq() { + int i; + + __sync_synchronize(); + for (i = 0; i < N; i++) { + if (i != N / 2) + A[i] = 1; + else + A[i] = 2; + B[i] = 3; + } + __sync_synchronize(); +} + +int main () { + int i; + + memset(A, 0, sizeof(int) * N); + memset(B, 0, sizeof(int) * N); + + loop_with_condition_ineq(); + + for (i = 0; i < N; i++) + if (B[i] != 3) + return 1; + + for (i = 0; i < N; i++) + if (i != N / 2 && A[i] != 1) + return 1; + else if (i == N && A[i] != 2) + return 1; + return 0; +} + diff --git a/polly/test/CodeGen/loop_with_condition_ineq.ll b/polly/test/CodeGen/loop_with_condition_ineq.ll new file mode 100644 index 00000000000..3531cafa7dc --- /dev/null +++ b/polly/test/CodeGen/loop_with_condition_ineq.ll @@ -0,0 +1,143 @@ +; ModuleID = 'loop_with_condition_ineq.s' +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s | lli +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +@A = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=4] +@B = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=4] + +define void @loop_with_condition_ineq() nounwind { +; <label>:0 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %1 + +; <label>:1 ; preds = %7, %0 + %indvar = phi i64 [ %indvar.next, %7 ], [ 0, %0 ] ; <i64> [#uses=5] + %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=2] + %scevgep1 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar ; <i32*> [#uses=1] + %i.0 = trunc i64 %indvar to i32 ; <i32> [#uses=1] + %exitcond = icmp ne i64 %indvar, 1024 ; <i1> [#uses=1] + br i1 %exitcond, label %2, label %8 + +; <label>:2 ; preds = %1 + %3 = icmp ne i32 %i.0, 512 ; <i1> [#uses=1] + br i1 %3, label %4, label %5 + +; <label>:4 ; preds = %2 + store i32 1, i32* %scevgep + br label %6 + +; <label>:5 ; preds = %2 + store i32 2, i32* %scevgep + br label %6 + +; <label>:6 ; preds = %5, %4 + store i32 3, i32* %scevgep1 + br label %7 + +; <label>:7 ; preds = %6 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %1 + +; <label>:8 ; preds = %1 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +define i32 @main() nounwind { +; <label>:0 + call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @A to i8*), i8 0, i64 4096, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @B to i8*), i8 0, i64 4096, i32 1, i1 false) + call void @loop_with_condition_ineq() + br label %1 + +; <label>:1 ; preds = %8, %0 + %indvar1 = phi i64 [ %indvar.next2, %8 ], [ 0, %0 ] ; <i64> [#uses=3] + %scevgep3 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar1 ; <i32*> [#uses=1] + %i.0 = trunc i64 %indvar1 to i32 ; <i32> [#uses=1] + %2 = icmp slt i32 %i.0, 1024 ; <i1> [#uses=1] + br i1 %2, label %3, label %9 + +; <label>:3 ; preds = %1 + %4 = load i32* %scevgep3 ; <i32> [#uses=1] + %5 = icmp ne i32 %4, 3 ; <i1> [#uses=1] + br i1 %5, label %6, label %7 + +; <label>:6 ; preds = %3 + br label %28 + +; <label>:7 ; preds = %3 + br label %8 + +; <label>:8 ; preds = %7 + %indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1] + br label %1 + +; <label>:9 ; preds = %1 + br label %10 + +; <label>:10 ; preds = %26, %9 + %indvar = phi i64 [ %indvar.next, %26 ], [ 0, %9 ] ; <i64> [#uses=3] + %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=2] + %i.1 = trunc i64 %indvar to i32 ; <i32> [#uses=3] + %11 = icmp slt i32 %i.1, 1024 ; <i1> [#uses=1] + br i1 %11, label %12, label %27 + +; <label>:12 ; preds = %10 + %13 = icmp ne i32 %i.1, 512 ; <i1> [#uses=1] + br i1 %13, label %14, label %18 + +; <label>:14 ; preds = %12 + %15 = load i32* %scevgep ; <i32> [#uses=1] + %16 = icmp ne i32 %15, 1 ; <i1> [#uses=1] + br i1 %16, label %17, label %18 + +; <label>:17 ; preds = %14 + br label %28 + +; <label>:18 ; preds = %14, %12 + %19 = icmp eq i32 %i.1, 1024 ; <i1> [#uses=1] + br i1 %19, label %20, label %24 + +; <label>:20 ; preds = %18 + %21 = load i32* %scevgep ; <i32> [#uses=1] + %22 = icmp ne i32 %21, 2 ; <i1> [#uses=1] + br i1 %22, label %23, label %24 + +; <label>:23 ; preds = %20 + br label %28 + +; <label>:24 ; preds = %20, %18 + br label %25 + +; <label>:25 ; preds = %24 + br label %26 + +; <label>:26 ; preds = %25 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %10 + +; <label>:27 ; preds = %10 + br label %28 + +; <label>:28 ; preds = %27, %23, %17, %6 + %.0 = phi i32 [ 1, %6 ], [ 1, %17 ], [ 1, %23 ], [ 0, %27 ] ; <i32> [#uses=1] + ret i32 %.0 +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +; CHECK: for (c2=0;c2<=511;c2++) { +; CHECK: Stmt_4(c2); +; CHECK: Stmt_6(c2); +; CHECK: } +; CHECK: Stmt_5(512); +; CHECK: Stmt_6(512); +; CHECK: for (c2=513;c2<=1023;c2++) { +; CHECK: Stmt_4(c2); +; CHECK: Stmt_6(c2); +; CHECK: } + diff --git a/polly/test/CodeGen/loop_with_condition_nested.c b/polly/test/CodeGen/loop_with_condition_nested.c new file mode 100644 index 00000000000..b3f743a8c38 --- /dev/null +++ b/polly/test/CodeGen/loop_with_condition_nested.c @@ -0,0 +1,45 @@ +#include <string.h> +#define N 1024 +int A[N]; +int B[N]; + +void loop_with_condition() { + int i; + + __sync_synchronize(); + for (i = 0; i < N; i++) { + if (i <= N / 2) { + if (i > 20) + A[i] = 1; + else + A[i] = 2; + } + B[i] = 3; + } + __sync_synchronize(); +} + +int main () { + int i; + + memset(A, 0, sizeof(int) * N); + memset(B, 0, sizeof(int) * N); + + loop_with_condition(); + + for (i = 0; i < N; i++) + if (B[i] != 3) + return 1; + + for (i = 0; i < N; i++) + if (i <= N / 2 && i > 20 && A[i] != 1) + return 1; + else if (i > N / 2) { + if (i <= 20 && A[i] != 2) + return 1; + if (i > 20 && A[i] != 0) + return 1; + } + return 0; +} + diff --git a/polly/test/CodeGen/loop_with_condition_nested.ll b/polly/test/CodeGen/loop_with_condition_nested.ll new file mode 100644 index 00000000000..0a5f0cc4c18 --- /dev/null +++ b/polly/test/CodeGen/loop_with_condition_nested.ll @@ -0,0 +1,174 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s | lli +; ModuleID = 'loop_with_condition_nested.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +@A = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=4] +@B = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=4] + +define void @loop_with_condition() nounwind { +; <label>:0 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %1 + +; <label>:1 ; preds = %10, %0 + %indvar = phi i64 [ %indvar.next, %10 ], [ 0, %0 ] ; <i64> [#uses=5] + %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=2] + %scevgep1 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar ; <i32*> [#uses=1] + %i.0 = trunc i64 %indvar to i32 ; <i32> [#uses=2] + %exitcond = icmp ne i64 %indvar, 1024 ; <i1> [#uses=1] + br i1 %exitcond, label %2, label %11 + +; <label>:2 ; preds = %1 + %3 = icmp sle i32 %i.0, 512 ; <i1> [#uses=1] + br i1 %3, label %4, label %9 + +; <label>:4 ; preds = %2 + %5 = icmp sgt i32 %i.0, 20 ; <i1> [#uses=1] + br i1 %5, label %6, label %7 + +; <label>:6 ; preds = %4 + store i32 1, i32* %scevgep + br label %8 + +; <label>:7 ; preds = %4 + store i32 2, i32* %scevgep + br label %8 + +; <label>:8 ; preds = %7, %6 + br label %9 + +; <label>:9 ; preds = %8, %2 + store i32 3, i32* %scevgep1 + br label %10 + +; <label>:10 ; preds = %9 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %1 + +; <label>:11 ; preds = %1 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +define i32 @main() nounwind { +; <label>:0 + call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @A to i8*), i8 0, i64 4096, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @B to i8*), i8 0, i64 4096, i32 1, i1 false) + call void @loop_with_condition() + br label %1 + +; <label>:1 ; preds = %8, %0 + %indvar1 = phi i64 [ %indvar.next2, %8 ], [ 0, %0 ] ; <i64> [#uses=3] + %scevgep3 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar1 ; <i32*> [#uses=1] + %i.0 = trunc i64 %indvar1 to i32 ; <i32> [#uses=1] + %2 = icmp slt i32 %i.0, 1024 ; <i1> [#uses=1] + br i1 %2, label %3, label %9 + +; <label>:3 ; preds = %1 + %4 = load i32* %scevgep3 ; <i32> [#uses=1] + %5 = icmp ne i32 %4, 3 ; <i1> [#uses=1] + br i1 %5, label %6, label %7 + +; <label>:6 ; preds = %3 + br label %39 + +; <label>:7 ; preds = %3 + br label %8 + +; <label>:8 ; preds = %7 + %indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1] + br label %1 + +; <label>:9 ; preds = %1 + br label %10 + +; <label>:10 ; preds = %37, %9 + %indvar = phi i64 [ %indvar.next, %37 ], [ 0, %9 ] ; <i64> [#uses=3] + %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=3] + %i.1 = trunc i64 %indvar to i32 ; <i32> [#uses=6] + %11 = icmp slt i32 %i.1, 1024 ; <i1> [#uses=1] + br i1 %11, label %12, label %38 + +; <label>:12 ; preds = %10 + %13 = icmp sle i32 %i.1, 512 ; <i1> [#uses=1] + br i1 %13, label %14, label %20 + +; <label>:14 ; preds = %12 + %15 = icmp sgt i32 %i.1, 20 ; <i1> [#uses=1] + br i1 %15, label %16, label %20 + +; <label>:16 ; preds = %14 + %17 = load i32* %scevgep ; <i32> [#uses=1] + %18 = icmp ne i32 %17, 1 ; <i1> [#uses=1] + br i1 %18, label %19, label %20 + +; <label>:19 ; preds = %16 + br label %39 + +; <label>:20 ; preds = %16, %14, %12 + %21 = icmp sgt i32 %i.1, 512 ; <i1> [#uses=1] + br i1 %21, label %22, label %35 + +; <label>:22 ; preds = %20 + %23 = icmp sle i32 %i.1, 20 ; <i1> [#uses=1] + br i1 %23, label %24, label %28 + +; <label>:24 ; preds = %22 + %25 = load i32* %scevgep ; <i32> [#uses=1] + %26 = icmp ne i32 %25, 2 ; <i1> [#uses=1] + br i1 %26, label %27, label %28 + +; <label>:27 ; preds = %24 + br label %39 + +; <label>:28 ; preds = %24, %22 + %29 = icmp sgt i32 %i.1, 20 ; <i1> [#uses=1] + br i1 %29, label %30, label %34 + +; <label>:30 ; preds = %28 + %31 = load i32* %scevgep ; <i32> [#uses=1] + %32 = icmp ne i32 %31, 0 ; <i1> [#uses=1] + br i1 %32, label %33, label %34 + +; <label>:33 ; preds = %30 + br label %39 + +; <label>:34 ; preds = %30, %28 + br label %35 + +; <label>:35 ; preds = %34, %20 + br label %36 + +; <label>:36 ; preds = %35 + br label %37 + +; <label>:37 ; preds = %36 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %10 + +; <label>:38 ; preds = %10 + br label %39 + +; <label>:39 ; preds = %38, %33, %27, %19, %6 + %.0 = phi i32 [ 1, %6 ], [ 1, %19 ], [ 1, %27 ], [ 1, %33 ], [ 0, %38 ] ; <i32> [#uses=1] + ret i32 %.0 +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +; CHECK: for (c2=0;c2<=20;c2++) { +; CHECK: Stmt_7(c2); +; CHECK: Stmt_9(c2); +; CHECK: } +; CHECK: for (c2=21;c2<=512;c2++) { +; CHECK: Stmt_6(c2); +; CHECK: Stmt_9(c2); +; CHECK: } +; CHECK: for (c2=513;c2<=1023;c2++) { +; CHECK: Stmt_9(c2); +; CHECK: } + diff --git a/polly/test/CodeGen/matmul_vec.c b/polly/test/CodeGen/matmul_vec.c new file mode 100644 index 00000000000..ade07d079bf --- /dev/null +++ b/polly/test/CodeGen/matmul_vec.c @@ -0,0 +1,46 @@ +#define M 1024 +#define N 1024 +#define K 1024 +float A[K][M]; +float B[N][K]; +float C[M][N]; +/* +void matmul_vec(void) { + int i, j, k; + + + /* With much unrolling + for (i=0;i<=M;i++) + for (j=0;j<=N;j+=4) + for (k=0;k<=K;k+=8) + for (kk=k;kk<=k+7;kk++) + for (jj=j;jj<=j+3;jj++) + C[i][jj] += A[kk][i] * B[jj][kk]; + vec_load splat scalar_load + */ + /* Without unrolling + for (i=0;i<=M;i++) + for (j=0;j<=N;j+=4) + for (k=0;k<=K;k++) + for (jj=j;jj<=j+3;jj++) + C[i][jj] += A[k][i] * B[jj][kk]; + vec_load splat scalar_load + / + +} +i*/ +int main() +{ + int i, j, k; + //matmul_vec(); + for(i=0; i<M/4; i++) + for(k=0; k<K; k++) { + for(j=0; j<N; j++) + C[i+0][j] += A[k][i+0] * B[j][k]; + C[i+1][j] += A[k][i+1] * B[j][k]; + C[i+2][j] += A[k][i+2] * B[j][k]; + C[i+3][j] += A[k][i+3] * B[j][k]; + } + + return A[42][42]; +} diff --git a/polly/test/CodeGen/matmul_vec.ll b/polly/test/CodeGen/matmul_vec.ll new file mode 100644 index 00000000000..f08915ce302 --- /dev/null +++ b/polly/test/CodeGen/matmul_vec.ll @@ -0,0 +1,92 @@ +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=`dirname %s` -polly-codegen -enable-polly-vector -S -dce %s | FileCheck %s + +; ModuleID = 'matmul_vec.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x [1024 x float]] zeroinitializer, align 16 +@B = common global [1024 x [1024 x float]] zeroinitializer, align 16 +@C = common global [1024 x [1024 x float]] zeroinitializer, align 16 + +define void @matmul_vec() nounwind { +; <label>:0 + br label %1 + +; <label>:1 ; preds = %16, %0 + %indvar3 = phi i64 [ %indvar.next4, %16 ], [ 0, %0 ] + %exitcond9 = icmp ne i64 %indvar3, 1024 + br i1 %exitcond9, label %2, label %17 + +; <label>:2 ; preds = %1 + br label %3 + +; <label>:3 ; preds = %14, %2 + %indvar1 = phi i64 [ %indvar.next2, %14 ], [ 0, %2 ] + %scevgep8 = getelementptr [1024 x [1024 x float]]* @C, i64 0, i64 %indvar3, i64 %indvar1 + %exitcond6 = icmp ne i64 %indvar1, 1024 + br i1 %exitcond6, label %4, label %15 + +; <label>:4 ; preds = %3 + br label %5 + +; <label>:5 ; preds = %12, %4 + %indvar = phi i64 [ %indvar.next, %12 ], [ 0, %4 ] + %scevgep5 = getelementptr [1024 x [1024 x float]]* @A, i64 0, i64 %indvar, i64 %indvar3 + %scevgep = getelementptr [1024 x [1024 x float]]* @B, i64 0, i64 %indvar1, i64 %indvar + %exitcond = icmp ne i64 %indvar, 1024 + br i1 %exitcond, label %6, label %13 + +; <label>:6 ; preds = %5 + %7 = load float* %scevgep5, align 4 + %8 = load float* %scevgep, align 4 + %9 = fmul float %7, %8 + %10 = load float* %scevgep8, align 4 + %11 = fadd float %10, %9 + store float %11, float* %scevgep8, align 4 + br label %12 + +; <label>:12 ; preds = %6 + %indvar.next = add i64 %indvar, 1 + br label %5 + +; <label>:13 ; preds = %5 + br label %14 + +; <label>:14 ; preds = %13 + %indvar.next2 = add i64 %indvar1, 1 + br label %3 + +; <label>:15 ; preds = %3 + br label %16 + +; <label>:16 ; preds = %15 + %indvar.next4 = add i64 %indvar3, 1 + br label %1 + +; <label>:17 ; preds = %1 + ret void +} + +define i32 @main() nounwind { + call void @matmul_vec() + %1 = load float* getelementptr inbounds ([1024 x [1024 x float]]* @A, i64 0, i64 42, i64 42), align 8 + %2 = fptosi float %1 to i32 + ret i32 %2 +} + +; CHECK: load <1 x float>* +; CHECK: shufflevector <1 x float> +; CHECK: load float* +; CHECK: insertelement <4 x float> +; CHECK: load float* +; CHECK: insertelement <4 x float> +; CHECK: load float* +; CHECK: insertelement <4 x float> +; CHECK: load float* +; CHECK: insertelement <4 x float> +; CHECK: fmul <4 x float> +; CHECK: bitcast float* +; CHECK: load <4 x float>* +; CHECK: fadd <4 x float> +; CHECK: bitcast float* +; CHECK: store <4 x float> diff --git a/polly/test/CodeGen/matmul_vec___%1---%17.jscop b/polly/test/CodeGen/matmul_vec___%1---%17.jscop new file mode 100644 index 00000000000..a5b16125c7e --- /dev/null +++ b/polly/test/CodeGen/matmul_vec___%1---%17.jscop @@ -0,0 +1,25 @@ +{ + "name": "%1 => %17", + "context": "{ [] }", + "statements": [{ + "name": "Stmt_6", + "domain": "{ Stmt_6[i0, i1, i2] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 1023 }", + "schedule": "{ Stmt_6[i0, i2, i1] -> scattering[i0, i1, o, i2] : exists (e0 = [(o)/4]: 4e0 = o and o <= i2 and o >= -3 + i2) }", + "accesses": [{ + "kind": "read", + "relation": "{ Stmt_6[i0, i1, i2] -> MemRef_A[i0 + 1024i2] }" + }, + { + "kind": "read", + "relation": "{ Stmt_6[i0, i1, i2] -> MemRef_B[1024i1 + i2] }" + }, + { + "kind": "read", + "relation": "{ Stmt_6[i0, i1, i2] -> MemRef_C[1024i0 + i1] }" + }, + { + "kind": "write", + "relation": "{ Stmt_6[i0, i1, i2] -> MemRef_C[1024i0 + i1] }" + }] + }] +} diff --git a/polly/test/CodeGen/pluto_matmult.c b/polly/test/CodeGen/pluto_matmult.c new file mode 100644 index 00000000000..898612fab6a --- /dev/null +++ b/polly/test/CodeGen/pluto_matmult.c @@ -0,0 +1,36 @@ +#define M 2048 +#define N 2048 +#define K 2048 +#define alpha 1 +#define beta 1 +double A[M][K+13]; +double B[K][N+13]; +double C[M][N+13]; + +void init_array(); +void print_array(); + +void pluto_matmult(void) { + int i, j, k; + + __sync_synchronize(); + for(i=0; i<M; i++) + for(j=0; j<N; j++) + for(k=0; k<K; k++) + C[i][j] = beta*C[i][j] + alpha*A[i][k] * B[k][j]; + __sync_synchronize(); +} + +int main() +{ + register double s; + + init_array(); + +#pragma scop + pluto_matmult(); +#pragma endscop + print_array(); + + return 0; +} diff --git a/polly/test/CodeGen/pluto_matmult.ll b/polly/test/CodeGen/pluto_matmult.ll new file mode 100644 index 00000000000..06db747d105 --- /dev/null +++ b/polly/test/CodeGen/pluto_matmult.ll @@ -0,0 +1,113 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s > /dev/null +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=`dirname %s` -polly-cloog -analyze -S < %s | FileCheck -check-prefix=IMPORT %s +; ModuleID = 'pluto-matmul.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@C = common global [2048 x [2061 x double]] zeroinitializer, align 8 ; <[2048 x [2061 x double]]*> [#uses=2] +@A = common global [2048 x [2061 x double]] zeroinitializer, align 8 ; <[2048 x [2061 x double]]*> [#uses=2] +@B = common global [2048 x [2061 x double]] zeroinitializer, align 8 ; <[2048 x [2061 x double]]*> [#uses=2] + +define void @pluto_matmult() nounwind { +entry: + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %for.cond + +for.cond: ; preds = %for.inc44, %entry + %indvar3 = phi i64 [ %indvar.next4, %for.inc44 ], [ 0, %entry ] ; <i64> [#uses=4] + %exitcond6 = icmp ne i64 %indvar3, 2048 ; <i1> [#uses=1] + br i1 %exitcond6, label %for.body, label %for.end47 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc40, %for.body + %indvar1 = phi i64 [ %indvar.next2, %for.inc40 ], [ 0, %for.body ] ; <i64> [#uses=4] + %arrayidx12 = getelementptr [2048 x [2061 x double]]* @C, i64 0, i64 %indvar3, i64 %indvar1 ; <double*> [#uses=2] + %exitcond5 = icmp ne i64 %indvar1, 2048 ; <i1> [#uses=1] + br i1 %exitcond5, label %for.body4, label %for.end43 + +for.body4: ; preds = %for.cond1 + br label %for.cond5 + +for.cond5: ; preds = %for.inc, %for.body4 + %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body4 ] ; <i64> [#uses=4] + %arrayidx20 = getelementptr [2048 x [2061 x double]]* @A, i64 0, i64 %indvar3, i64 %indvar ; <double*> [#uses=1] + %arrayidx29 = getelementptr [2048 x [2061 x double]]* @B, i64 0, i64 %indvar, i64 %indvar1 ; <double*> [#uses=1] + %exitcond = icmp ne i64 %indvar, 2048 ; <i1> [#uses=1] + br i1 %exitcond, label %for.body8, label %for.end + +for.body8: ; preds = %for.cond5 + %tmp13 = load double* %arrayidx12 ; <double> [#uses=1] + %mul = fmul double 1.000000e+00, %tmp13 ; <double> [#uses=1] + %tmp21 = load double* %arrayidx20 ; <double> [#uses=1] + %mul22 = fmul double 1.000000e+00, %tmp21 ; <double> [#uses=1] + %tmp30 = load double* %arrayidx29 ; <double> [#uses=1] + %mul31 = fmul double %mul22, %tmp30 ; <double> [#uses=1] + %add = fadd double %mul, %mul31 ; <double> [#uses=1] + store double %add, double* %arrayidx12 + br label %for.inc + +for.inc: ; preds = %for.body8 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %for.cond5 + +for.end: ; preds = %for.cond5 + br label %for.inc40 + +for.inc40: ; preds = %for.end + %indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1] + br label %for.cond1 + +for.end43: ; preds = %for.cond1 + br label %for.inc44 + +for.inc44: ; preds = %for.end43 + %indvar.next4 = add i64 %indvar3, 1 ; <i64> [#uses=1] + br label %for.cond + +for.end47: ; preds = %for.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +define i32 @main() nounwind { +entry: + call void (...)* @init_array() + call void @pluto_matmult() + call void (...)* @print_array() + ret i32 0 +} + +declare void @init_array(...) + +declare void @print_array(...) +; CHECK: for (c2=0;c2<=2047;c2++) { +; CHECK: for (c4=0;c4<=2047;c4++) { +; CHECK: for (c6=0;c6<=2047;c6++) { +; CHECK: Stmt_for_body8(c2,c4,c6); +; CHECK: } +; CHECK: } +; CHECK: } + + +; Do not dump the complete CLooG output. New CLooG version optimize more +; in this test case. +; IMPORT: for (c2=0;c2<=2047;c2+=64) { +; IMPORT: c3<=min(2047,c2+63);c3++) { +; IMPORT: for (c6=0;c6<=2047;c6+=64) { +; IMPORT: c7<=min(2047,c6+63);c7++) { +; IMPORT: for (c10=0;c10<=2047;c10+=64) { +; IMPORT: c11<=min(2047,c10+63);c11++) +; IMPORT: { +; IMPORT: Stmt_for_body8(c3,c7,c11); +; IMPORT: } +; IMPORT: } +; IMPORT: } +; IMPORT: } +; IMPORT: } +; IMPORT: } + diff --git a/polly/test/CodeGen/pluto_matmult___%for.cond---%for.end47.jscop b/polly/test/CodeGen/pluto_matmult___%for.cond---%for.end47.jscop new file mode 100644 index 00000000000..54b32963136 --- /dev/null +++ b/polly/test/CodeGen/pluto_matmult___%for.cond---%for.end47.jscop @@ -0,0 +1,25 @@ +{ + "name": "for.cond => for.end47", + "context": "{ [] }", + "statements": [{ + "name": "Stmt_for_body8", + "domain": "{ Stmt_for_body8[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 2047 and i2 >= 0 and i2 <= 2047 }", + "schedule": "{ Stmt_for_body8[i0, i1, i2] -> scattering[0, o1, i0, o3, 0, o5, i1, o7, 0, o9, i2, o11, 0] : 64o7 = o5 and 64o11 = o9 and 64o3 = o1 and o1 <= i0 and o1 >= -63 + i0 and o5 <= i1 and o5 >= -63 + i1 and o9 <= i2 and o9 >= -63 + i2 }", + "accesses": [{ + "kind": "read", + "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[2061i0 + i1] }" + }, + { + "kind": "read", + "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[2061i0 + i2] }" + }, + { + "kind": "read", + "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i1 + 2061i2] }" + }, + { + "kind": "write", + "relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[2061i0 + i1] }" + }] + }] +}
\ No newline at end of file diff --git a/polly/test/CodeGen/reduction.c b/polly/test/CodeGen/reduction.c new file mode 100644 index 00000000000..73a5e1e5381 --- /dev/null +++ b/polly/test/CodeGen/reduction.c @@ -0,0 +1,27 @@ +#include <string.h> +#include <stdio.h> +#define N 1021 + +int main () { + int i; + int A[N]; + int red; + + memset(A, 0, sizeof(int) * N); + + A[0] = 1; + A[1] = 1; + red = 0; + + __sync_synchronize(); + + for (i = 2; i < N; i++) { + A[i] = A[i-1] + A[i-2]; + red += A[i-2]; + } + + __sync_synchronize(); + + if (red != 382399368) + return 1; +} diff --git a/polly/test/CodeGen/reduction.ll b/polly/test/CodeGen/reduction.ll new file mode 100644 index 00000000000..2efb8f263da --- /dev/null +++ b/polly/test/CodeGen/reduction.ll @@ -0,0 +1,64 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -S < %s 2>&1 | not FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen | lli +; XFAIL: * +; ModuleID = 'reduction.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +define i32 @main() nounwind { +; <label>:0 + %A = alloca [1021 x i32], align 16 ; <[1021 x i32]*> [#uses=6] + %1 = getelementptr inbounds [1021 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %2 = bitcast i32* %1 to i8* ; <i8*> [#uses=1] + call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 4084, i32 1, i1 false) + %3 = getelementptr inbounds [1021 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %4 = getelementptr inbounds i32* %3, i64 0 ; <i32*> [#uses=1] + store i32 1, i32* %4 + %5 = getelementptr inbounds [1021 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %6 = getelementptr inbounds i32* %5, i64 1 ; <i32*> [#uses=1] + store i32 1, i32* %6 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %7 + +; <label>:7 ; preds = %14, %0 + %indvar = phi i64 [ %indvar.next, %14 ], [ 0, %0 ] ; <i64> [#uses=5] + %red.0 = phi i32 [ 0, %0 ], [ %13, %14 ] ; <i32> [#uses=2] + %scevgep = getelementptr [1021 x i32]* %A, i64 0, i64 %indvar ; <i32*> [#uses=2] + %tmp = add i64 %indvar, 2 ; <i64> [#uses=1] + %scevgep1 = getelementptr [1021 x i32]* %A, i64 0, i64 %tmp ; <i32*> [#uses=1] + %tmp2 = add i64 %indvar, 1 ; <i64> [#uses=1] + %scevgep3 = getelementptr [1021 x i32]* %A, i64 0, i64 %tmp2 ; <i32*> [#uses=1] + %exitcond = icmp ne i64 %indvar, 1019 ; <i1> [#uses=1] + br i1 %exitcond, label %8, label %15 + +; <label>:8 ; preds = %7 + %9 = load i32* %scevgep3 ; <i32> [#uses=1] + %10 = load i32* %scevgep ; <i32> [#uses=1] + %11 = add nsw i32 %9, %10 ; <i32> [#uses=1] + store i32 %11, i32* %scevgep1 + %12 = load i32* %scevgep ; <i32> [#uses=1] + %13 = add nsw i32 %red.0, %12 ; <i32> [#uses=1] + br label %14 + +; <label>:14 ; preds = %8 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %7 + +; <label>:15 ; preds = %7 + %red.0.lcssa = phi i32 [ %red.0, %7 ] ; <i32> [#uses=1] + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %16 = icmp ne i32 %red.0.lcssa, 382399368 ; <i1> [#uses=1] + br i1 %16, label %17, label %18 + +; <label>:17 ; preds = %15 + br label %18 + +; <label>:18 ; preds = %17, %15 + %.0 = phi i32 [ 1, %17 ], [ 0, %15 ] ; <i32> [#uses=1] + ret i32 %.0 +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind +; CHECK: Could not generate independent blocks diff --git a/polly/test/CodeGen/reduction_2.c b/polly/test/CodeGen/reduction_2.c new file mode 100644 index 00000000000..ec7173f1761 --- /dev/null +++ b/polly/test/CodeGen/reduction_2.c @@ -0,0 +1,23 @@ +#include <string.h> +#include <stdio.h> +#define N 1021 + +int main () { + int i; + int A[N]; + int RED[1]; + + memset(A, 0, sizeof(int) * N); + + A[0] = 1; + A[1] = 1; + RED[0] = 0; + + for (i = 2; i < N; i++) { + A[i] = A[i-1] + A[i-2]; + RED[0] += A[i-2]; + } + + if (RED[0] != 382399368) + return 1; +} diff --git a/polly/test/CodeGen/reduction_2.ll b/polly/test/CodeGen/reduction_2.ll new file mode 100644 index 00000000000..37987420be1 --- /dev/null +++ b/polly/test/CodeGen/reduction_2.ll @@ -0,0 +1,71 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s | lli +; ModuleID = 'reduction_2.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() nounwind { +entry: + %A = alloca [1021 x i32], align 4 ; <[1021 x i32]*> [#uses=6] + %RED = alloca [1 x i32], align 4 ; <[1 x i32]*> [#uses=3] + %arraydecay = getelementptr inbounds [1021 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %conv = bitcast i32* %arraydecay to i8* ; <i8*> [#uses=1] + call void @llvm.memset.p0i8.i64(i8* %conv, i8 0, i64 4084, i32 1, i1 false) + %arraydecay1 = getelementptr inbounds [1021 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx = getelementptr inbounds i32* %arraydecay1, i64 0 ; <i32*> [#uses=1] + store i32 1, i32* %arrayidx + %arraydecay2 = getelementptr inbounds [1021 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx3 = getelementptr inbounds i32* %arraydecay2, i64 1 ; <i32*> [#uses=1] + store i32 1, i32* %arrayidx3 + %arraydecay4 = getelementptr inbounds [1 x i32]* %RED, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx5 = getelementptr inbounds i32* %arraydecay4, i64 0 ; <i32*> [#uses=1] + store i32 0, i32* %arrayidx5 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] ; <i64> [#uses=5] + %arrayidx15 = getelementptr [1021 x i32]* %A, i64 0, i64 %indvar ; <i32*> [#uses=2] + %tmp = add i64 %indvar, 2 ; <i64> [#uses=1] + %arrayidx20 = getelementptr [1021 x i32]* %A, i64 0, i64 %tmp ; <i32*> [#uses=1] + %tmp1 = add i64 %indvar, 1 ; <i64> [#uses=1] + %arrayidx9 = getelementptr [1021 x i32]* %A, i64 0, i64 %tmp1 ; <i32*> [#uses=1] + %exitcond = icmp ne i64 %indvar, 1019 ; <i1> [#uses=1] + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp10 = load i32* %arrayidx9 ; <i32> [#uses=1] + %tmp16 = load i32* %arrayidx15 ; <i32> [#uses=1] + %add = add nsw i32 %tmp10, %tmp16 ; <i32> [#uses=1] + store i32 %add, i32* %arrayidx20 + %tmp26 = load i32* %arrayidx15 ; <i32> [#uses=1] + %arraydecay27 = getelementptr inbounds [1 x i32]* %RED, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx28 = getelementptr inbounds i32* %arraydecay27, i64 0 ; <i32*> [#uses=2] + %tmp29 = load i32* %arrayidx28 ; <i32> [#uses=1] + %add30 = add nsw i32 %tmp29, %tmp26 ; <i32> [#uses=1] + store i32 %add30, i32* %arrayidx28 + br label %for.inc + +for.inc: ; preds = %for.body + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %for.cond + +for.end: ; preds = %for.cond + %arraydecay32 = getelementptr inbounds [1 x i32]* %RED, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx33 = getelementptr inbounds i32* %arraydecay32, i64 0 ; <i32*> [#uses=1] + %tmp34 = load i32* %arrayidx33 ; <i32> [#uses=1] + %cmp35 = icmp ne i32 %tmp34, 382399368 ; <i1> [#uses=1] + br i1 %cmp35, label %if.then, label %if.end + +if.then: ; preds = %for.end + br label %if.end + +if.end: ; preds = %if.then, %for.end + %retval.0 = phi i32 [ 1, %if.then ], [ 0, %for.end ] ; <i32> [#uses=1] + ret i32 %retval.0 +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +; CHECK: for (c2=0;c2<=1018;c2++) { +; CHECK: Stmt_for_body(c2); +; CHECK: } diff --git a/polly/test/CodeGen/scev.ll b/polly/test/CodeGen/scev.ll new file mode 100644 index 00000000000..48211275b17 --- /dev/null +++ b/polly/test/CodeGen/scev.ll @@ -0,0 +1,23 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect < %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define fastcc void @f () inlinehint align 2 { +entry: + %0 = fmul double undef, 1.250000e+00 ; <double> [#uses=1] + %1 = fptoui double %0 to i32 ; <i32> [#uses=0] + br i1 false, label %bb5.i, label %bb.nph.i + +bb.nph.i: ; preds = %bb.i1 + br label %bb3.i2 + +bb3.i2: ; preds = %bb3.i2, %bb.nph.i + br i1 undef, label %bb3.i2, label %bb5.i + +bb5.i: ; preds = %bb3.i2, %bb.i1 + br label %exit + +exit: + ret void +} diff --git a/polly/test/CodeGen/sequential_loops.c b/polly/test/CodeGen/sequential_loops.c new file mode 100644 index 00000000000..4eac4b0f8ef --- /dev/null +++ b/polly/test/CodeGen/sequential_loops.c @@ -0,0 +1,31 @@ +#include <string.h> +#define N 1024 + +int A[N]; + +void sequential_loops() { + int i; + for (i = 0; i < N/2; i++) { + A[i] = 1; + } + for (i = N/2 ; i < N; i++) { + A[i] = 2; + } +} + +int main () { + int i; + memset(A, 0, sizeof(int) * N); + + sequential_loops(); + + for (i = 0; i < N; i++) { + if (A[i] != 1 && i < N/2) + return 1; + if (A[i] != 2 && i >= N/2) + return 1; + } + + return 0; +} + diff --git a/polly/test/CodeGen/sequential_loops.ll b/polly/test/CodeGen/sequential_loops.ll new file mode 100644 index 00000000000..15799c74004 --- /dev/null +++ b/polly/test/CodeGen/sequential_loops.ll @@ -0,0 +1,108 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s | lli - +; ModuleID = 'sequential_loops.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +@A = common global [1024 x i32] zeroinitializer, align 4 ; <[1024 x i32]*> [#uses=5] + +define void @sequential_loops() nounwind { +bb: + br label %bb1 + +bb1: ; preds = %bb3, %bb + %indvar1 = phi i64 [ %indvar.next2, %bb3 ], [ 0, %bb ] + %scevgep4 = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar1 + %exitcond3 = icmp ne i64 %indvar1, 512 + br i1 %exitcond3, label %bb2, label %bb4 + +bb2: ; preds = %bb1 + store i32 1, i32* %scevgep4 + br label %bb3 + +bb3: ; preds = %bb2 + %indvar.next2 = add i64 %indvar1, 1 + br label %bb1 + +bb4: ; preds = %bb1 + br label %bb5 + +bb5: ; preds = %bb7, %bb4 + %indvar = phi i64 [ %indvar.next, %bb7 ], [ 0, %bb4 ] + %tmp = add i64 %indvar, 512 + %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %tmp + %exitcond = icmp ne i64 %indvar, 512 + br i1 %exitcond, label %bb6, label %bb8 + +bb6: ; preds = %bb5 + store i32 2, i32* %scevgep + br label %bb7 + +bb7: ; preds = %bb6 + %indvar.next = add i64 %indvar, 1 + br label %bb5 + +bb8: ; preds = %bb5 + ret void +} + +define i32 @main() nounwind { +bb: + call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @A to i8*), i8 0, i64 4096, i32 1, i1 false) + call void @sequential_loops() + br label %bb1 + +bb1: ; preds = %bb15, %bb + %indvar = phi i64 [ %indvar.next, %bb15 ], [ 0, %bb ] + %i.0 = trunc i64 %indvar to i32 + %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar + %tmp = icmp slt i32 %i.0, 1024 + br i1 %tmp, label %bb2, label %bb16 + +bb2: ; preds = %bb1 + %tmp3 = load i32* %scevgep + %tmp4 = icmp ne i32 %tmp3, 1 + br i1 %tmp4, label %bb5, label %bb8 + +bb5: ; preds = %bb2 + %tmp6 = icmp slt i32 %i.0, 512 + br i1 %tmp6, label %bb7, label %bb8 + +bb7: ; preds = %bb5 + br label %bb17 + +bb8: ; preds = %bb5, %bb2 + %tmp9 = load i32* %scevgep + %tmp10 = icmp ne i32 %tmp9, 2 + br i1 %tmp10, label %bb11, label %bb14 + +bb11: ; preds = %bb8 + %tmp12 = icmp sge i32 %i.0, 512 + br i1 %tmp12, label %bb13, label %bb14 + +bb13: ; preds = %bb11 + br label %bb17 + +bb14: ; preds = %bb11, %bb8 + br label %bb15 + +bb15: ; preds = %bb14 + %indvar.next = add i64 %indvar, 1 + br label %bb1 + +bb16: ; preds = %bb1 + br label %bb17 + +bb17: ; preds = %bb16, %bb13, %bb7 + %.0 = phi i32 [ 1, %bb7 ], [ 1, %bb13 ], [ 0, %bb16 ] + ret i32 %.0 +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +; CHECK: for (c2=0;c2<=511;c2++) { +; CHECK: Stmt_bb2(c2); +; CHECK: } +; CHECK: for (c2=0;c2<=511;c2++) { +; CHECK: Stmt_bb6(c2); +; CHECK: } + diff --git a/polly/test/CodeGen/simple_vec_assign_scalar.c b/polly/test/CodeGen/simple_vec_assign_scalar.c new file mode 100644 index 00000000000..1d1eb0a0b4b --- /dev/null +++ b/polly/test/CodeGen/simple_vec_assign_scalar.c @@ -0,0 +1,15 @@ +#define N 1024 +float A[N]; +float B[N]; + +void simple_vec_const(void) { + int i; + + for (i = 0; i < 4; i++) + B[i] = A[i] + 1; +} +int main() +{ + simple_vec_const(); + return A[42]; +} diff --git a/polly/test/CodeGen/simple_vec_assign_scalar.ll b/polly/test/CodeGen/simple_vec_assign_scalar.ll new file mode 100644 index 00000000000..fcde23bce85 --- /dev/null +++ b/polly/test/CodeGen/simple_vec_assign_scalar.ll @@ -0,0 +1,43 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s +; ModuleID = 'simple_vec_assign_scalar.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x float] zeroinitializer, align 16 +@B = common global [1024 x float] zeroinitializer, align 16 + +define void @simple_vec_const() nounwind { +bb: + br label %bb2 + +bb2: ; preds = %bb5, %bb + %indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ] + %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar + %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 4 + br i1 %exitcond, label %bb3, label %bb6 + +bb3: ; preds = %bb2 + %tmp = load float* %scevgep1, align 4 + %tmp4 = fadd float %tmp, 1.000000e+00 + store float %tmp4, float* %scevgep, align 4 + br label %bb5 + +bb5: ; preds = %bb3 + %indvar.next = add i64 %indvar, 1 + br label %bb2 + +bb6: ; preds = %bb2 + ret void +} + +define i32 @main() nounwind { +bb: + call void @simple_vec_const() + %tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8 + %tmp1 = fptosi float %tmp to i32 + ret i32 %tmp1 +} + +; CHECK: %tmp4p_vec = fadd <4 x float> %tmp_p_vec_full, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> + diff --git a/polly/test/CodeGen/simple_vec_assign_scalar_2.c b/polly/test/CodeGen/simple_vec_assign_scalar_2.c new file mode 100644 index 00000000000..ace959f7ffa --- /dev/null +++ b/polly/test/CodeGen/simple_vec_assign_scalar_2.c @@ -0,0 +1,15 @@ +#define N 1024 +float A[N]; +float B[N]; + +void simple_vec_const(void) { + int i; + + for (i = 0; i < 4; i++) + B[i] = A[i] + i; +} +int main() +{ + simple_vec_const(); + return A[42]; +} diff --git a/polly/test/CodeGen/simple_vec_assign_scalar_2.ll b/polly/test/CodeGen/simple_vec_assign_scalar_2.ll new file mode 100644 index 00000000000..c8ce6b54c4c --- /dev/null +++ b/polly/test/CodeGen/simple_vec_assign_scalar_2.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s +; ModuleID = 'simple_vec_assign_scalar_2.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x float] zeroinitializer, align 16 +@B = common global [1024 x float] zeroinitializer, align 16 + +define void @simple_vec_const() nounwind { +bb: + br label %bb2 + +bb2: ; preds = %bb6, %bb + %indvar = phi i64 [ %indvar.next, %bb6 ], [ 0, %bb ] + %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar + %i.0 = trunc i64 %indvar to i32 + %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 4 + br i1 %exitcond, label %bb3, label %bb7 + +bb3: ; preds = %bb2 + %tmp = load float* %scevgep1, align 4 + %tmp4 = sitofp i32 %i.0 to float + %tmp5 = fadd float %tmp, %tmp4 + store float %tmp5, float* %scevgep, align 4 + br label %bb6 + +bb6: ; preds = %bb3 + %indvar.next = add i64 %indvar, 1 + br label %bb2 + +bb7: ; preds = %bb2 + ret void +} + +define i32 @main() nounwind { +bb: + call void @simple_vec_const() + %tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8 + %tmp1 = fptosi float %tmp to i32 + ret i32 %tmp1 +} + +; CHECK: insertelement <4 x float> undef, float %p_tmp4, i32 0 +; CHECK: shufflevector <4 x float> %0, <4 x float> %0, <4 x i32> zeroinitializer +; CHECK: fadd <4 x float> %tmp_p_vec_full, %1 + diff --git a/polly/test/CodeGen/simple_vec_const.c b/polly/test/CodeGen/simple_vec_const.c new file mode 100644 index 00000000000..e75c71c8b35 --- /dev/null +++ b/polly/test/CodeGen/simple_vec_const.c @@ -0,0 +1,15 @@ +#define N 1024 +float A[N]; +float B[N]; + +void simple_vec_const(void) { + int i; + + for (i = 0; i < 4; i++) + B[i] = A[0]; +} +int main() +{ + simple_vec_const(); + return A[42]; +} diff --git a/polly/test/CodeGen/simple_vec_const.ll b/polly/test/CodeGen/simple_vec_const.ll new file mode 100644 index 00000000000..f290084b1f4 --- /dev/null +++ b/polly/test/CodeGen/simple_vec_const.ll @@ -0,0 +1,43 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -S %s | FileCheck %s + +; ModuleID = 'simple_vec_const.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x float] zeroinitializer, align 16 +@B = common global [1024 x float] zeroinitializer, align 16 + +define void @simple_vec_const() nounwind { +; <label>:0 + br label %1 + +; <label>:1 ; preds = %4, %0 + %indvar = phi i64 [ %indvar.next, %4 ], [ 0, %0 ] + %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 4 + br i1 %exitcond, label %2, label %5 + +; <label>:2 ; preds = %1 + %3 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16 + store float %3, float* %scevgep, align 4 + br label %4 + +; <label>:4 ; preds = %2 + %indvar.next = add i64 %indvar, 1 + br label %1 + +; <label>:5 ; preds = %1 + ret void +} + +define i32 @main() nounwind { + call void @simple_vec_const() + %1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8 + %2 = fptosi float %1 to i32 + ret i32 %2 +} + + +; CHECK: load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*) +; CHECK: shufflevector <1 x float> {{.*}}, <1 x float> {{.*}} <4 x i32> zeroinitializer + diff --git a/polly/test/CodeGen/simple_vec_large_width.ll b/polly/test/CodeGen/simple_vec_large_width.ll new file mode 100644 index 00000000000..fe7f7df4095 --- /dev/null +++ b/polly/test/CodeGen/simple_vec_large_width.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x float] zeroinitializer, align 16 +@B = common global [1024 x float] zeroinitializer, align 16 + +define void @simple_vec_large_width() nounwind { +; <label>:0 + br label %1 + +; <label>:1 ; preds = %4, %0 + %indvar = phi i64 [ %indvar.next, %4 ], [ 0, %0 ] + %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar + %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 15 + br i1 %exitcond, label %2, label %5 + +; <label>:2 ; preds = %1 + %3 = load float* %scevgep1, align 4 + store float %3, float* %scevgep, align 4 + br label %4 + +; <label>:4 ; preds = %2 + %indvar.next = add i64 %indvar, 1 + br label %1 + +; <label>:5 ; preds = %1 + ret void +} + +define i32 @main() nounwind { + call void @simple_vec_large_width() + %1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8 + %2 = fptosi float %1 to i32 + ret i32 %2 +} + +; CHECK: bitcast float* {{.*}} to <15 x float>* +; CHECK: load <15 x float>* +; CHECK: store <15 x float> %_p_vec_full, <15 x float>* diff --git a/polly/test/CodeGen/simple_vec_stride_one.c b/polly/test/CodeGen/simple_vec_stride_one.c new file mode 100644 index 00000000000..720563ef630 --- /dev/null +++ b/polly/test/CodeGen/simple_vec_stride_one.c @@ -0,0 +1,15 @@ +#define N 1024 +float A[N]; +float B[N]; + +void simple_vec_stride_one(void) { + int i; + + for (i = 0; i < 4; i++) + B[i] = A[i]; +} +int main() +{ + simple_vec_stride_one(); + return A[42]; +} diff --git a/polly/test/CodeGen/simple_vec_stride_one.ll b/polly/test/CodeGen/simple_vec_stride_one.ll new file mode 100644 index 00000000000..57d55045352 --- /dev/null +++ b/polly/test/CodeGen/simple_vec_stride_one.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=`dirname %s` -polly-cloog -analyze %s | FileCheck -check-prefix=IMPORT %s +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=`dirname %s` -polly-codegen %s -S -enable-polly-vector | FileCheck -check-prefix=CODEGEN %s +; ModuleID = 'simple_vec_stride_one.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x float] zeroinitializer, align 16 +@B = common global [1024 x float] zeroinitializer, align 16 + +define void @simple_vec_stride_one() nounwind { +; <label>:0 + br label %1 + +; <label>:1 ; preds = %4, %0 + %indvar = phi i64 [ %indvar.next, %4 ], [ 0, %0 ] + %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar + %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 4 + br i1 %exitcond, label %2, label %5 + +; <label>:2 ; preds = %1 + %3 = load float* %scevgep1, align 4 + store float %3, float* %scevgep, align 4 + br label %4 + +; <label>:4 ; preds = %2 + %indvar.next = add i64 %indvar, 1 + br label %1 + +; <label>:5 ; preds = %1 + ret void +} + +define i32 @main() nounwind { + call void @simple_vec_stride_one() + %1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8 + %2 = fptosi float %1 to i32 + ret i32 %2 +} + +; CHECK: bitcast float* {{.*}} to <4 x float>* +; CHECK: load <4 x float>* +; CHECK: store <4 x float> %_p_vec_full, <4 x float>* %vector_ptr + +; IMPORT: for (c2=0;c2<=12;c2+=4) { +; IMPORT: Stmt_2(c2/4); +; IMPORT: } + +; We do not generate optimal loads for this. +; CODEGEN: <4 x float> + diff --git a/polly/test/CodeGen/simple_vec_stride_one___%1---%5.jscop b/polly/test/CodeGen/simple_vec_stride_one___%1---%5.jscop new file mode 100644 index 00000000000..dec45ba8b39 --- /dev/null +++ b/polly/test/CodeGen/simple_vec_stride_one___%1---%5.jscop @@ -0,0 +1,17 @@ +{ + "name": "%1 => %5", + "context": "{ [] }", + "statements": [{ + "name": "Stmt_2", + "domain": "{ Stmt_2[i0] : i0 >= 0 and i0 <= 3 }", + "schedule": "{ Stmt_2[i0] -> scattering[0, 4i0, 0] }", + "accesses": [{ + "kind": "read", + "relation": "{ Stmt_2[i0] -> MemRef_A[i0] }" + }, + { + "kind": "write", + "relation": "{ Stmt_2[i0] -> MemRef_B[i0] }" + }] + }] +} diff --git a/polly/test/CodeGen/simple_vec_stride_x.c b/polly/test/CodeGen/simple_vec_stride_x.c new file mode 100644 index 00000000000..b6623bf79fa --- /dev/null +++ b/polly/test/CodeGen/simple_vec_stride_x.c @@ -0,0 +1,15 @@ +#define N 1024 +float A[N]; +float B[N]; + +void simple_vec_stride_x(void) { + int i; + + for (i = 0; i < 4; i++) + B[2 * i] = A[2 * i]; +} +int main() +{ + simple_vec_stride_x(); + return A[42]; +} diff --git a/polly/test/CodeGen/simple_vec_stride_x.ll b/polly/test/CodeGen/simple_vec_stride_x.ll new file mode 100644 index 00000000000..1a927251875 --- /dev/null +++ b/polly/test/CodeGen/simple_vec_stride_x.ll @@ -0,0 +1,59 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s +; ModuleID = 'simple_vec_stride_x.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x float] zeroinitializer, align 16 +@B = common global [1024 x float] zeroinitializer, align 16 + +define void @simple_vec_stride_x() nounwind { +bb: + br label %bb2 + +bb2: ; preds = %bb5, %bb + %indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ] + %tmp = mul i64 %indvar, 2 + %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %tmp + %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %tmp + %exitcond = icmp ne i64 %indvar, 4 + br i1 %exitcond, label %bb3, label %bb6 + +bb3: ; preds = %bb2 + %tmp4 = load float* %scevgep1, align 8 + store float %tmp4, float* %scevgep, align 8 + br label %bb5 + +bb5: ; preds = %bb3 + %indvar.next = add i64 %indvar, 1 + br label %bb2 + +bb6: ; preds = %bb2 + ret void +} + +define i32 @main() nounwind { +bb: + call void @simple_vec_stride_x() + %tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8 + %tmp1 = fptosi float %tmp to i32 + ret i32 %tmp1 +} + +; CHECK: load float* %p_scevgep1.moved.to.bb3 +; CHECK: insertelement <4 x float> undef, float %tmp4_p_scalar_, i32 0 +; CHECK: load float* %p_scevgep1.moved.to.bb31 +; CHECK: insertelement <4 x float> %tmp4_p_vec_, float %tmp4_p_scalar_7, i32 1 +; CHECK: load float* %p_scevgep1.moved.to.bb32 +; CHECK: insertelement <4 x float> %tmp4_p_vec_8, float %tmp4_p_scalar_9, i32 2 +; CHECK: load float* %p_scevgep1.moved.to.bb33 +; CHECK: insertelement <4 x float> %tmp4_p_vec_10, float %tmp4_p_scalar_11, i32 3 +; CHECK: extractelement <4 x float> %tmp4_p_vec_12, i32 0 +; CHECK: store float %0, float* %p_scevgep.moved.to.bb3 +; CHECK: extractelement <4 x float> %tmp4_p_vec_12, i32 1 +; CHECK: store float %1, float* %p_scevgep.moved.to.bb34 +; CHECK: extractelement <4 x float> %tmp4_p_vec_12, i32 2 +; CHECK: store float %2, float* %p_scevgep.moved.to.bb35 +; CHECK: extractelement <4 x float> %tmp4_p_vec_12, i32 3 +; CHECK: store float %3, float* %p_scevgep.moved.to.bb36 + + diff --git a/polly/test/CodeGen/simple_vec_two_stmts.ll b/polly/test/CodeGen/simple_vec_two_stmts.ll new file mode 100644 index 00000000000..3e27e7df14d --- /dev/null +++ b/polly/test/CodeGen/simple_vec_two_stmts.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x float] zeroinitializer, align 16 +@B = common global [1024 x float] zeroinitializer, align 16 +@C = common global [1024 x float] zeroinitializer, align 16 + +define void @simple_vec_stride_one() nounwind { +bb0: + br label %bb1 + +bb1: + %indvar = phi i64 [ %indvar.next, %bb4 ], [ 0, %bb0 ] + %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar + %scevgep2 = getelementptr [1024 x float]* @C, i64 0, i64 %indvar + %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 4 + br i1 %exitcond, label %bb2a, label %bb5 + +bb2a: + %tmp1 = load float* %scevgep1, align 4 + store float %tmp1, float* %scevgep, align 4 + br label %bb2b + +bb2b: + %tmp2 = load float* %scevgep1, align 4 + store float %tmp2, float* %scevgep2, align 4 + br label %bb4 + +bb4: + %indvar.next = add i64 %indvar, 1 + br label %bb1 + +bb5: + ret void +} + +define i32 @main() nounwind { + call void @simple_vec_stride_one() + %1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8 + %2 = fptosi float %1 to i32 + ret i32 %2 +} + +; CHECK: bitcast float* {{.*}} to <4 x float>* +; CHECK: load <4 x float>* +; CHECK: store <4 x float> %tmp1_p_vec_full, <4 x float>* %vector_ptr7 +; CHECK: bitcast float* {{.*}} to <4 x float>* +; CHECK: load <4 x float>* +; CHECK: store <4 x float> %tmp2_p_vec_full, <4 x float>* %vector_ptr15 + diff --git a/polly/test/CodeGen/single_do_loop_int_max_iterations.c b/polly/test/CodeGen/single_do_loop_int_max_iterations.c new file mode 100644 index 00000000000..3cd17058b0a --- /dev/null +++ b/polly/test/CodeGen/single_do_loop_int_max_iterations.c @@ -0,0 +1,34 @@ +#define N 20 +#include "limits.h" +#include <stdio.h> +volatile int A[N]; + +void single_do_loop_int_max_iterations() { + int i; + + __sync_synchronize(); + + i = 0; + + do { + A[0] = i; + ++i; + } while (i < INT_MAX); + + __sync_synchronize(); +} + +int main () { + int i; + + A[0] = 0; + + single_do_loop_int_max_iterations(); + + fprintf(stdout, "Output %d\n", A[0]); + + if (A[0] == INT_MAX - 1) + return 0; + else + return 1; +} diff --git a/polly/test/CodeGen/single_do_loop_int_max_iterations.ll b/polly/test/CodeGen/single_do_loop_int_max_iterations.ll new file mode 100644 index 00000000000..1bbfc1a6d7e --- /dev/null +++ b/polly/test/CodeGen/single_do_loop_int_max_iterations.ll @@ -0,0 +1,61 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze -S < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -O3 < %s | lli +; RUN: opt %loadPolly %defaultOpts -polly-codegen -O3 < %s | lli +; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=%d -polly-codegen < %s | lli +; ModuleID = 'single_do_loop_int_max_iterations.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [20 x i32] zeroinitializer, align 4 ; <[20 x i32]*> [#uses=1] +@stdout = external global %struct._IO_FILE* ; <%struct._IO_FILE**> [#uses=1] +@.str = private constant [11 x i8] c"Output %d\0A\00" ; <[11 x i8]*> [#uses=1] + +define void @single_do_loop_int_max_iterations() nounwind { +entry: + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %do.body + +do.body: ; preds = %do.cond, %entry + %0 = phi i32 [ 0, %entry ], [ %inc, %do.cond ] ; <i32> [#uses=2] + volatile store i32 %0, i32* getelementptr inbounds ([20 x i32]* @A, i32 0, i32 0) + %inc = add nsw i32 %0, 1 ; <i32> [#uses=2] + br label %do.cond + +do.cond: ; preds = %do.body + %exitcond = icmp ne i32 %inc, 2147483647 ; <i1> [#uses=1] + br i1 %exitcond, label %do.body, label %do.end + +do.end: ; preds = %do.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +define i32 @main() nounwind { +entry: + volatile store i32 0, i32* getelementptr inbounds ([20 x i32]* @A, i32 0, i32 0) + call void @single_do_loop_int_max_iterations() + %tmp = load %struct._IO_FILE** @stdout ; <%struct._IO_FILE*> [#uses=1] + %tmp1 = volatile load i32* getelementptr inbounds ([20 x i32]* @A, i32 0, i32 0) ; <i32> [#uses=1] + %call = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %tmp, i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i32 %tmp1) ; <i32> [#uses=0] + %tmp2 = volatile load i32* getelementptr inbounds ([20 x i32]* @A, i32 0, i32 0) ; <i32> [#uses=1] + %cmp = icmp eq i32 %tmp2, 2147483646 ; <i1> [#uses=1] + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %return + +if.else: ; preds = %entry + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; <i32> [#uses=1] + ret i32 %retval.0 +} + +declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) +; CHECK:for (c2=0;c2<=2147483646;c2++) { diff --git a/polly/test/CodeGen/single_do_loop_int_max_iterations___%do.body---%do.end.jscop b/polly/test/CodeGen/single_do_loop_int_max_iterations___%do.body---%do.end.jscop new file mode 100644 index 00000000000..fcd78fcc892 --- /dev/null +++ b/polly/test/CodeGen/single_do_loop_int_max_iterations___%do.body---%do.end.jscop @@ -0,0 +1,13 @@ +{ + "name": "do.body => do.end", + "context": "{ [] }", + "statements": [{ + "name": "Stmt_do_body", + "domain": "{ Stmt_do_body[i0] : i0 >= 0 and i0 <= 2147483646 }", + "schedule": "{ Stmt_do_body[i0] -> scattering[0, o1, i0, o3, 0] : 64o3 = o1 and o1 <= i0 and o1 >= -63 + i0 }", + "accesses": [{ + "kind": "write", + "relation": "{ Stmt_do_body[i0] -> MemRef_A[0] }" + }] + }] +}
\ No newline at end of file diff --git a/polly/test/CodeGen/single_do_loop_int_param_iterations.c b/polly/test/CodeGen/single_do_loop_int_param_iterations.c new file mode 100644 index 00000000000..06e5bf6737a --- /dev/null +++ b/polly/test/CodeGen/single_do_loop_int_param_iterations.c @@ -0,0 +1,25 @@ +#define N 20 +#include "limits.h" +volatile int A[N]; + +void bar (int n) { + int i; + __sync_synchronize(); + i = 0; + + do { + A[0] = i; + ++i; + } while (i < 2 * n); + __sync_synchronize(); +} + +int main () { + A[0] = 0; + bar (N/2); + + if (A[0] == N - 1 ) + return 0; + else + return 1; +} diff --git a/polly/test/CodeGen/single_do_loop_int_param_iterations.ll b/polly/test/CodeGen/single_do_loop_int_param_iterations.ll new file mode 100644 index 00000000000..b185487ff0a --- /dev/null +++ b/polly/test/CodeGen/single_do_loop_int_param_iterations.ll @@ -0,0 +1,53 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog-scop -S -analyze < %s | FileCheck %s +; XFAIL: * +; ModuleID = 'single_do_loop_int_param_iterations.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [20 x i32] zeroinitializer, align 4 ; <[20 x i32]*> [#uses=1] + +define void @bar(i32 %n) nounwind { +entry: + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %tmp = mul i32 %n, 2 ; <i32> [#uses=2] + %tmp1 = icmp sgt i32 %tmp, 1 ; <i1> [#uses=1] + %smax = select i1 %tmp1, i32 %tmp, i32 1 ; <i32> [#uses=1] + br label %do.body + +do.body: ; preds = %do.cond, %entry + %0 = phi i32 [ 0, %entry ], [ %inc, %do.cond ] ; <i32> [#uses=2] + volatile store i32 %0, i32* getelementptr inbounds ([20 x i32]* @A, i32 0, i32 0) + %inc = add nsw i32 %0, 1 ; <i32> [#uses=2] + br label %do.cond + +do.cond: ; preds = %do.body + %exitcond = icmp ne i32 %inc, %smax ; <i1> [#uses=1] + br i1 %exitcond, label %do.body, label %do.end + +do.end: ; preds = %do.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +define i32 @main() nounwind { +entry: + volatile store i32 0, i32* getelementptr inbounds ([20 x i32]* @A, i32 0, i32 0) + call void @bar(i32 10) + %tmp = volatile load i32* getelementptr inbounds ([20 x i32]* @A, i32 0, i32 0) ; <i32> [#uses=1] + %cmp = icmp eq i32 %tmp, 19 ; <i1> [#uses=1] + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %return + +if.else: ; preds = %entry + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; <i32> [#uses=1] + ret i32 %retval.0 +} +; CHECK: Scop: do.body => do.end + diff --git a/polly/test/CodeGen/single_do_loop_ll_max_iterations.c b/polly/test/CodeGen/single_do_loop_ll_max_iterations.c new file mode 100644 index 00000000000..8d81b22bbb3 --- /dev/null +++ b/polly/test/CodeGen/single_do_loop_ll_max_iterations.c @@ -0,0 +1,25 @@ +#define N 20 +#include "limits.h" +volatile long long A[N]; + +int main () { + long long i; + + A[0] = 0; + + __sync_synchronize(); + + i = 0; + + do { + A[0] = i; + ++i; + } while (i < LLONG_MAX); + + __sync_synchronize(); + + if (A[0] == LLONG_MAX - 1) + return 0; + else + return 1; +} diff --git a/polly/test/CodeGen/single_do_loop_ll_max_iterations.ll b/polly/test/CodeGen/single_do_loop_ll_max_iterations.ll new file mode 100644 index 00000000000..4722747edfd --- /dev/null +++ b/polly/test/CodeGen/single_do_loop_ll_max_iterations.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze -S < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen -O3 < %s +; ModuleID = 'single_do_loop_ll_max_iterations.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [20 x i64] zeroinitializer, align 8 ; <[20 x i64]*> [#uses=1] + +define i32 @main() nounwind { +entry: + volatile store i64 0, i64* getelementptr inbounds ([20 x i64]* @A, i32 0, i32 0) + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %do.body + +do.body: ; preds = %do.cond, %entry + %0 = phi i64 [ 0, %entry ], [ %inc, %do.cond ] ; <i64> [#uses=2] + volatile store i64 %0, i64* getelementptr inbounds ([20 x i64]* @A, i32 0, i32 0) + %inc = add nsw i64 %0, 1 ; <i64> [#uses=2] + br label %do.cond + +do.cond: ; preds = %do.body + %exitcond = icmp ne i64 %inc, 9223372036854775807 ; <i1> [#uses=1] + br i1 %exitcond, label %do.body, label %do.end + +do.end: ; preds = %do.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %tmp3 = volatile load i64* getelementptr inbounds ([20 x i64]* @A, i32 0, i32 0) ; <i64> [#uses=1] + %cmp4 = icmp eq i64 %tmp3, 9223372036854775806 ; <i1> [#uses=1] + br i1 %cmp4, label %if.then, label %if.else + +if.then: ; preds = %do.end + br label %return + +if.else: ; preds = %do.end + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; <i32> [#uses=1] + ret i32 %retval.0 +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind +; CHECK:for (c2=0;c2<=9223372036854775806;c2++) { + diff --git a/polly/test/CodeGen/single_do_loop_one_iteration.c b/polly/test/CodeGen/single_do_loop_one_iteration.c new file mode 100644 index 00000000000..df9c99d8f90 --- /dev/null +++ b/polly/test/CodeGen/single_do_loop_one_iteration.c @@ -0,0 +1,25 @@ +#define N 20 +#include "limits.h" + +int main () { + int i; + int A[N]; + + A[0] = 1; + + __sync_synchronize(); + + i = 0; + + do { + A[0] = 0; + ++i; + } while (i < 1); + + __sync_synchronize(); + + if (A[0] == 0) + return 0; + else + return 1; +} diff --git a/polly/test/CodeGen/single_do_loop_one_iteration.ll b/polly/test/CodeGen/single_do_loop_one_iteration.ll new file mode 100644 index 00000000000..bc55b10ee89 --- /dev/null +++ b/polly/test/CodeGen/single_do_loop_one_iteration.ll @@ -0,0 +1,46 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog-scop -S -analyze < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen -O3 < %s | lli +; XFAIL: * +; ModuleID = 'single_do_loop_one_iteration.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() nounwind { +entry: + %A = alloca [20 x i32], align 4 ; <[20 x i32]*> [#uses=3] + %arraydecay = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx = getelementptr inbounds i32* %arraydecay, i64 0 ; <i32*> [#uses=1] + store i32 1, i32* %arrayidx + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %do.body + +do.body: ; preds = %do.cond, %entry + %arraydecay1 = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx2 = getelementptr inbounds i32* %arraydecay1, i64 0 ; <i32*> [#uses=1] + store i32 0, i32* %arrayidx2 + br label %do.cond + +do.cond: ; preds = %do.body + br i1 false, label %do.body, label %do.end + +do.end: ; preds = %do.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %arraydecay4 = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx5 = getelementptr inbounds i32* %arraydecay4, i64 0 ; <i32*> [#uses=1] + %tmp6 = load i32* %arrayidx5 ; <i32> [#uses=1] + %cmp7 = icmp eq i32 %tmp6, 0 ; <i1> [#uses=1] + br i1 %cmp7, label %if.then, label %if.else + +if.then: ; preds = %do.end + br label %return + +if.else: ; preds = %do.end + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; <i32> [#uses=1] + ret i32 %retval.0 +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind +; CHECK: S0(0) diff --git a/polly/test/CodeGen/single_do_loop_scev_replace.c b/polly/test/CodeGen/single_do_loop_scev_replace.c new file mode 100644 index 00000000000..5f2e192beea --- /dev/null +++ b/polly/test/CodeGen/single_do_loop_scev_replace.c @@ -0,0 +1,32 @@ +#define N 20 +#include "limits.h" +#include <stdio.h> +volatile int A[2 * N]; + +void single_do_loop_scev_replace() { + int i; + + __sync_synchronize(); + + i = 0; + + do { + A[2 * i] = i; + ++i; + } while (i < N); + + __sync_synchronize(); +} + +int main () { + int i; + + single_do_loop_scev_replace(); + + fprintf(stdout, "Output %d\n", A[0]); + + if (A[2 * N - 2] == N - 1) + return 0; + else + return 1; +} diff --git a/polly/test/CodeGen/single_do_loop_scev_replace.ll b/polly/test/CodeGen/single_do_loop_scev_replace.ll new file mode 100644 index 00000000000..0737e8c87d1 --- /dev/null +++ b/polly/test/CodeGen/single_do_loop_scev_replace.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts < %s | lli +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s | lli +; ModuleID = 'single_do_loop_scev_replace.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [40 x i32] zeroinitializer, align 4 ; <[40 x i32]*> [#uses=3] +@stdout = external global %struct._IO_FILE* ; <%struct._IO_FILE**> [#uses=1] +@.str = private constant [11 x i8] c"Output %d\0A\00" ; <[11 x i8]*> [#uses=1] + +define void @single_do_loop_scev_replace() nounwind { +entry: + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %do.body + +do.body: ; preds = %do.cond, %entry + %indvar = phi i64 [ %indvar.next, %do.cond ], [ 0, %entry ] ; <i64> [#uses=3] + %tmp = mul i64 %indvar, 2 ; <i64> [#uses=1] + %arrayidx = getelementptr [40 x i32]* @A, i64 0, i64 %tmp ; <i32*> [#uses=1] + %i.0 = trunc i64 %indvar to i32 ; <i32> [#uses=1] + br label %do.cond + +do.cond: ; preds = %do.body + volatile store i32 %i.0, i32* %arrayidx + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] + %exitcond = icmp ne i64 %indvar.next, 20 ; <i1> [#uses=1] + br i1 %exitcond, label %do.body, label %do.end + +do.end: ; preds = %do.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +define i32 @main() nounwind { +entry: + call void @single_do_loop_scev_replace() + %tmp = load %struct._IO_FILE** @stdout ; <%struct._IO_FILE*> [#uses=1] + %tmp1 = volatile load i32* getelementptr inbounds ([40 x i32]* @A, i32 0, i32 0) ; <i32> [#uses=1] + %call = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %tmp, i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i32 %tmp1) ; <i32> [#uses=0] + %tmp2 = volatile load i32* getelementptr inbounds ([40 x i32]* @A, i32 0, i64 38) ; <i32> [#uses=1] + %cmp = icmp eq i32 %tmp2, 19 ; <i1> [#uses=1] + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %return + +if.else: ; preds = %entry + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; <i32> [#uses=1] + ret i32 %retval.0 +} + +declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) + +; CHECK: for (c2=0;c2<=19;c2++) { +; CHECK: Stmt_do_cond(c2); +; CHECK: } + diff --git a/polly/test/CodeGen/single_loop.c b/polly/test/CodeGen/single_loop.c new file mode 100644 index 00000000000..2e175e594bc --- /dev/null +++ b/polly/test/CodeGen/single_loop.c @@ -0,0 +1,20 @@ +#include <string.h> +#define N 1024 + +int main () { + int i; + int A[N]; + + memset(A, 0, sizeof(int) * N); + + for (i = 0; i < N; i++) { + A[i] = 1; + } + + for (i = 0; i < N; i++) + if (A[i] != 1) + return 1; + + return 0; +} + diff --git a/polly/test/CodeGen/single_loop.ll b/polly/test/CodeGen/single_loop.ll new file mode 100644 index 00000000000..3cd4329fff7 --- /dev/null +++ b/polly/test/CodeGen/single_loop.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s | lli - +; ModuleID = 'single_loop.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() nounwind { +entry: + %A = alloca [1024 x i32], align 4 ; <[1024 x i32]*> [#uses=3] + %arraydecay = getelementptr inbounds [1024 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %conv = bitcast i32* %arraydecay to i8* ; <i8*> [#uses=1] + call void @llvm.memset.p0i8.i64(i8* %conv, i8 0, i64 4096, i32 1, i1 false) + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvar1 = phi i64 [ %indvar.next2, %for.inc ], [ 0, %entry ] ; <i64> [#uses=3] + %arrayidx = getelementptr [1024 x i32]* %A, i64 0, i64 %indvar1 ; <i32*> [#uses=1] + %exitcond = icmp ne i64 %indvar1, 1024 ; <i1> [#uses=1] + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 1, i32* %arrayidx + br label %for.inc + +for.inc: ; preds = %for.body + %indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1] + br label %for.cond + +for.end: ; preds = %for.cond + br label %for.cond5 + +for.cond5: ; preds = %for.inc17, %for.end + %indvar = phi i64 [ %indvar.next, %for.inc17 ], [ 0, %for.end ] ; <i64> [#uses=3] + %arrayidx13 = getelementptr [1024 x i32]* %A, i64 0, i64 %indvar ; <i32*> [#uses=1] + %i.1 = trunc i64 %indvar to i32 ; <i32> [#uses=1] + %cmp7 = icmp slt i32 %i.1, 1024 ; <i1> [#uses=1] + br i1 %cmp7, label %for.body9, label %for.end20 + +for.body9: ; preds = %for.cond5 + %tmp14 = load i32* %arrayidx13 ; <i32> [#uses=1] + %cmp15 = icmp ne i32 %tmp14, 1 ; <i1> [#uses=1] + br i1 %cmp15, label %if.then, label %if.end + +if.then: ; preds = %for.body9 + br label %return + +if.end: ; preds = %for.body9 + br label %for.inc17 + +for.inc17: ; preds = %if.end + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %for.cond5 + +for.end20: ; preds = %for.cond5 + br label %return + +return: ; preds = %for.end20, %if.then + %retval.0 = phi i32 [ 1, %if.then ], [ 0, %for.end20 ] ; <i32> [#uses=1] + ret i32 %retval.0 +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +; CHECK:for (c2=0;c2<=1023;c2++) { +; CHECK: Stmt_for_body(c2); +; CHECK:} diff --git a/polly/test/CodeGen/single_loop_int_max_iterations.c b/polly/test/CodeGen/single_loop_int_max_iterations.c new file mode 100644 index 00000000000..d102fbf5dd2 --- /dev/null +++ b/polly/test/CodeGen/single_loop_int_max_iterations.c @@ -0,0 +1,21 @@ +#define N 20 +#include "limits.h" + +int main () { + int i; + int A[N]; + + A[0] = 0; + + __sync_synchronize(); + + for (i = 0; i < INT_MAX; i++) + A[0] = i; + + __sync_synchronize(); + + if (A[0] == INT_MAX - 1) + return 0; + else + return 1; +} diff --git a/polly/test/CodeGen/single_loop_int_max_iterations.ll b/polly/test/CodeGen/single_loop_int_max_iterations.ll new file mode 100644 index 00000000000..88f2611cce1 --- /dev/null +++ b/polly/test/CodeGen/single_loop_int_max_iterations.ll @@ -0,0 +1,53 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze -S < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen -O3 < %s | lli + +; ModuleID = 'single_loop_int_max_iterations.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() nounwind { +entry: + %A = alloca [20 x i32], align 4 ; <[20 x i32]*> [#uses=3] + %arraydecay = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx = getelementptr inbounds i32* %arraydecay, i64 0 ; <i32*> [#uses=1] + store i32 0, i32* %arrayidx + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] ; <i32> [#uses=3] + %exitcond = icmp ne i32 %0, 2147483647 ; <i1> [#uses=1] + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arraydecay2 = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx3 = getelementptr inbounds i32* %arraydecay2, i64 0 ; <i32*> [#uses=1] + store i32 %0, i32* %arrayidx3 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %0, 1 ; <i32> [#uses=1] + br label %for.cond + +for.end: ; preds = %for.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %arraydecay5 = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx6 = getelementptr inbounds i32* %arraydecay5, i64 0 ; <i32*> [#uses=1] + %tmp7 = load i32* %arrayidx6 ; <i32> [#uses=1] + %cmp8 = icmp eq i32 %tmp7, 2147483646 ; <i1> [#uses=1] + br i1 %cmp8, label %if.then, label %if.else + +if.then: ; preds = %for.end + br label %return + +if.else: ; preds = %for.end + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; <i32> [#uses=1] + ret i32 %retval.0 +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +; CHECK:for (c2=0;c2<=2147483646;c2++) { diff --git a/polly/test/CodeGen/single_loop_ll_max_iterations.c b/polly/test/CodeGen/single_loop_ll_max_iterations.c new file mode 100644 index 00000000000..f5f56073e59 --- /dev/null +++ b/polly/test/CodeGen/single_loop_ll_max_iterations.c @@ -0,0 +1,21 @@ +#include "limits.h" +#define N 20 + +int main () { + long long i; + long long A[N]; + + A[0] = 0; + + __sync_synchronize(); + + for (i = 0; i < LLONG_MAX; i++) + A[0] = i; + + __sync_synchronize(); + + if (A[0] == LLONG_MAX - 1) + return 0; + else + return 1; +} diff --git a/polly/test/CodeGen/single_loop_ll_max_iterations.ll b/polly/test/CodeGen/single_loop_ll_max_iterations.ll new file mode 100644 index 00000000000..9c8d24f349b --- /dev/null +++ b/polly/test/CodeGen/single_loop_ll_max_iterations.ll @@ -0,0 +1,53 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze -S < %s | FileCheck %s + +; ModuleID = 'single_loop_ll_max_iterations.s' +; +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() nounwind { +entry: + %A = alloca [20 x i64], align 8 ; <[20 x i64]*> [#uses=3] + %arraydecay = getelementptr inbounds [20 x i64]* %A, i32 0, i32 0 ; <i64*> [#uses=1] + %arrayidx = getelementptr inbounds i64* %arraydecay, i64 0 ; <i64*> [#uses=1] + store i64 0, i64* %arrayidx + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] ; <i64> [#uses=3] + %exitcond = icmp ne i64 %0, 9223372036854775807 ; <i1> [#uses=1] + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arraydecay2 = getelementptr inbounds [20 x i64]* %A, i32 0, i32 0 ; <i64*> [#uses=1] + %arrayidx3 = getelementptr inbounds i64* %arraydecay2, i64 0 ; <i64*> [#uses=1] + store i64 %0, i64* %arrayidx3 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i64 %0, 1 ; <i64> [#uses=1] + br label %for.cond + +for.end: ; preds = %for.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %arraydecay5 = getelementptr inbounds [20 x i64]* %A, i32 0, i32 0 ; <i64*> [#uses=1] + %arrayidx6 = getelementptr inbounds i64* %arraydecay5, i64 0 ; <i64*> [#uses=1] + %tmp7 = load i64* %arrayidx6 ; <i64> [#uses=1] + %cmp8 = icmp eq i64 %tmp7, 9223372036854775806 ; <i1> [#uses=1] + br i1 %cmp8, label %if.then, label %if.else + +if.then: ; preds = %for.end + br label %return + +if.else: ; preds = %for.end + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; <i32> [#uses=1] + ret i32 %retval.0 +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +; CHECK:for (c2=0;c2<=9223372036854775806;c2++) { diff --git a/polly/test/CodeGen/single_loop_one_iteration.c b/polly/test/CodeGen/single_loop_one_iteration.c new file mode 100644 index 00000000000..5540c3a811e --- /dev/null +++ b/polly/test/CodeGen/single_loop_one_iteration.c @@ -0,0 +1,20 @@ +#define N 20 + +int main () { + int i; + int A[N]; + + A[0] = 0; + + __sync_synchronize(); + + for (i = 0; i < 1; i++) + A[i] = 1; + + __sync_synchronize(); + + if (A[0] == 1) + return 0; + else + return 1; +} diff --git a/polly/test/CodeGen/single_loop_one_iteration.ll b/polly/test/CodeGen/single_loop_one_iteration.ll new file mode 100644 index 00000000000..887d4b99239 --- /dev/null +++ b/polly/test/CodeGen/single_loop_one_iteration.ll @@ -0,0 +1,51 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze < %s | FileCheck %s + +; ModuleID = 'single_loop_one_iteration.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() nounwind { +entry: + %A = alloca [20 x i32], align 4 ; <[20 x i32]*> [#uses=3] + %arraydecay = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx = getelementptr inbounds i32* %arraydecay, i64 0 ; <i32*> [#uses=1] + store i32 0, i32* %arrayidx + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] ; <i64> [#uses=3] + %arrayidx3 = getelementptr [20 x i32]* %A, i64 0, i64 %indvar ; <i32*> [#uses=1] + %exitcond = icmp ne i64 %indvar, 1 ; <i1> [#uses=1] + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 1, i32* %arrayidx3 + br label %for.inc + +for.inc: ; preds = %for.body + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %for.cond + +for.end: ; preds = %for.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %arraydecay5 = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx6 = getelementptr inbounds i32* %arraydecay5, i64 0 ; <i32*> [#uses=1] + %tmp7 = load i32* %arrayidx6 ; <i32> [#uses=1] + %cmp8 = icmp eq i32 %tmp7, 1 ; <i1> [#uses=1] + br i1 %cmp8, label %if.then, label %if.else + +if.then: ; preds = %for.end + br label %return + +if.else: ; preds = %for.end + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; <i32> [#uses=1] + ret i32 %retval.0 +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +; CHECK: Stmt_for_body(0); diff --git a/polly/test/CodeGen/single_loop_param.ll b/polly/test/CodeGen/single_loop_param.ll new file mode 100644 index 00000000000..7c66241372a --- /dev/null +++ b/polly/test/CodeGen/single_loop_param.ll @@ -0,0 +1,81 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze < %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-codegen < %s | lli - +; ModuleID = 'single_loop_param.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +@A = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=3] + +define void @bar(i64 %n) nounwind { +bb: + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %bb1 + +bb1: ; preds = %bb3, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp, %bb3 ] ; <i64> [#uses=3] + %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %i.0 ; <i32*> [#uses=1] + %exitcond = icmp ne i64 %i.0, %n ; <i1> [#uses=1] + br i1 %exitcond, label %bb2, label %bb4 + +bb2: ; preds = %bb1 + store i32 1, i32* %scevgep + br label %bb3 + +bb3: ; preds = %bb2 + %tmp = add nsw i64 %i.0, 1 ; <i64> [#uses=1] + br label %bb1 + +bb4: ; preds = %bb1 + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + ret void +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +define i32 @main() nounwind { +bb: + call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @A to i8*), i8 0, i64 4096, i32 1, i1 false) + call void @bar(i64 1024) + br label %bb1 + +bb1: ; preds = %bb7, %bb + %indvar = phi i64 [ %indvar.next, %bb7 ], [ 0, %bb ] ; <i64> [#uses=3] + %scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=1] + %i.0 = trunc i64 %indvar to i32 ; <i32> [#uses=1] + %tmp = icmp slt i32 %i.0, 1024 ; <i1> [#uses=1] + br i1 %tmp, label %bb2, label %bb8 + +bb2: ; preds = %bb1 + %tmp3 = load i32* %scevgep ; <i32> [#uses=1] + %tmp4 = icmp ne i32 %tmp3, 1 ; <i1> [#uses=1] + br i1 %tmp4, label %bb5, label %bb6 + +bb5: ; preds = %bb2 + br label %bb9 + +bb6: ; preds = %bb2 + br label %bb7 + +bb7: ; preds = %bb6 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %bb1 + +bb8: ; preds = %bb1 + br label %bb9 + +bb9: ; preds = %bb8, %bb5 + %.0 = phi i32 [ 1, %bb5 ], [ 0, %bb8 ] ; <i32> [#uses=1] + ret i32 %.0 +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +; CHECK: if (M >= 1) { +; CHECK: for (c2=0;c2<=M-1;c2++) { +; CHECK: Stmt_bb2(c2); +; CHECK: } +; CHECK: } + diff --git a/polly/test/CodeGen/single_loop_uint_max_iterations.c b/polly/test/CodeGen/single_loop_uint_max_iterations.c new file mode 100644 index 00000000000..86658b65bae --- /dev/null +++ b/polly/test/CodeGen/single_loop_uint_max_iterations.c @@ -0,0 +1,21 @@ +#include "limits.h" +#define N 20 + +int main () { + unsigned int i; + unsigned int A[N]; + + A[0] = 0; + + __sync_synchronize(); + + for (i = 0; i < UINT_MAX; i++) + A[0] = i; + + __sync_synchronize(); + + if (A[0] == UINT_MAX - 1) + return 0; + else + return 1; +} diff --git a/polly/test/CodeGen/single_loop_uint_max_iterations.ll b/polly/test/CodeGen/single_loop_uint_max_iterations.ll new file mode 100644 index 00000000000..bd6d41e1398 --- /dev/null +++ b/polly/test/CodeGen/single_loop_uint_max_iterations.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog-scop -S -analyze < %s | FileCheck %s +; XFAIL: * +; ModuleID = 'single_loop_uint_max_iterations.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() nounwind { +entry: + %A = alloca [20 x i32], align 4 ; <[20 x i32]*> [#uses=3] + %arraydecay = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx = getelementptr inbounds i32* %arraydecay, i64 0 ; <i32*> [#uses=1] + store i32 0, i32* %arrayidx + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] ; <i32> [#uses=3] + %exitcond = icmp ne i32 %0, -1 ; <i1> [#uses=1] + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arraydecay2 = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx3 = getelementptr inbounds i32* %arraydecay2, i64 0 ; <i32*> [#uses=1] + store i32 %0, i32* %arrayidx3 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i32 %0, 1 ; <i32> [#uses=1] + br label %for.cond + +for.end: ; preds = %for.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %arraydecay5 = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx6 = getelementptr inbounds i32* %arraydecay5, i64 0 ; <i32*> [#uses=1] + %tmp7 = load i32* %arrayidx6 ; <i32> [#uses=1] + %cmp8 = icmp eq i32 %tmp7, -2 ; <i1> [#uses=1] + br i1 %cmp8, label %if.then, label %if.else + +if.then: ; preds = %for.end + br label %return + +if.else: ; preds = %for.end + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; <i32> [#uses=1] + ret i32 %retval.0 +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind + +; CHECK:for (c2=0; diff --git a/polly/test/CodeGen/single_loop_ull_max_iterations.c b/polly/test/CodeGen/single_loop_ull_max_iterations.c new file mode 100644 index 00000000000..56d7677495a --- /dev/null +++ b/polly/test/CodeGen/single_loop_ull_max_iterations.c @@ -0,0 +1,21 @@ +#include "limits.h" +#define N 20 + +int main () { + unsigned long long i; + unsigned long long A[N]; + + A[0] = 0; + + __sync_synchronize(); + + for (i = 0; i < ULLONG_MAX; i++) + A[0] = i; + + __sync_synchronize(); + + if (A[0] == ULLONG_MAX - 1) + return 0; + else + return 1; +} diff --git a/polly/test/CodeGen/single_loop_ull_max_iterations.ll b/polly/test/CodeGen/single_loop_ull_max_iterations.ll new file mode 100644 index 00000000000..37312b98f6f --- /dev/null +++ b/polly/test/CodeGen/single_loop_ull_max_iterations.ll @@ -0,0 +1,51 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog-scop -S -analyze < %s | FileCheck %s +; XFAIL: * +; ModuleID = 'single_loop_ull_max_iterations.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() nounwind { +entry: + %A = alloca [20 x i64], align 8 ; <[20 x i64]*> [#uses=3] + %arraydecay = getelementptr inbounds [20 x i64]* %A, i32 0, i32 0 ; <i64*> [#uses=1] + %arrayidx = getelementptr inbounds i64* %arraydecay, i64 0 ; <i64*> [#uses=1] + store i64 0, i64* %arrayidx + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] ; <i64> [#uses=3] + %exitcond = icmp ne i64 %0, -1 ; <i1> [#uses=1] + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arraydecay2 = getelementptr inbounds [20 x i64]* %A, i32 0, i32 0 ; <i64*> [#uses=1] + %arrayidx3 = getelementptr inbounds i64* %arraydecay2, i64 0 ; <i64*> [#uses=1] + store i64 %0, i64* %arrayidx3 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i64 %0, 1 ; <i64> [#uses=1] + br label %for.cond + +for.end: ; preds = %for.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %arraydecay5 = getelementptr inbounds [20 x i64]* %A, i32 0, i32 0 ; <i64*> [#uses=1] + %arrayidx6 = getelementptr inbounds i64* %arraydecay5, i64 0 ; <i64*> [#uses=1] + %tmp7 = load i64* %arrayidx6 ; <i64> [#uses=1] + %cmp8 = icmp eq i64 %tmp7, -2 ; <i1> [#uses=1] + br i1 %cmp8, label %if.then, label %if.else + +if.then: ; preds = %for.end + br label %return + +if.else: ; preds = %for.end + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; <i32> [#uses=1] + ret i32 %retval.0 +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind +; CHECK:for (c2=0; diff --git a/polly/test/CodeGen/single_loop_zero_iterations.c b/polly/test/CodeGen/single_loop_zero_iterations.c new file mode 100644 index 00000000000..87c55962c21 --- /dev/null +++ b/polly/test/CodeGen/single_loop_zero_iterations.c @@ -0,0 +1,20 @@ +#define N 20 + +int main () { + int i; + int A[N]; + + A[0] = 0; + + __sync_synchronize(); + + for (i = 0; i < 0; i++) + A[i] = 1; + + __sync_synchronize(); + + if (A[0] == 0) + return 0; + else + return 1; +} diff --git a/polly/test/CodeGen/single_loop_zero_iterations.ll b/polly/test/CodeGen/single_loop_zero_iterations.ll new file mode 100644 index 00000000000..0a489502297 --- /dev/null +++ b/polly/test/CodeGen/single_loop_zero_iterations.ll @@ -0,0 +1,51 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze -S < %s | FileCheck %s + +; ModuleID = 'single_loop_zero_iterations.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() nounwind { +entry: + %A = alloca [20 x i32], align 4 ; <[20 x i32]*> [#uses=3] + %arraydecay = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx = getelementptr inbounds i32* %arraydecay, i64 0 ; <i32*> [#uses=1] + store i32 0, i32* %arrayidx + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] ; <i64> [#uses=2] + %arrayidx3 = getelementptr [20 x i32]* %A, i64 0, i64 %indvar ; <i32*> [#uses=1] + br i1 false, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 1, i32* %arrayidx3 + br label %for.inc + +for.inc: ; preds = %for.body + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %for.cond + +for.end: ; preds = %for.cond + call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false) + %arraydecay5 = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1] + %arrayidx6 = getelementptr inbounds i32* %arraydecay5, i64 0 ; <i32*> [#uses=1] + %tmp7 = load i32* %arrayidx6 ; <i32> [#uses=1] + %cmp8 = icmp eq i32 %tmp7, 0 ; <i1> [#uses=1] + br i1 %cmp8, label %if.then, label %if.else + +if.then: ; preds = %for.end + br label %return + +if.else: ; preds = %for.end + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; <i32> [#uses=1] + ret i32 %retval.0 +} + +declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind +; CHECK: for region: 'for.cond => for.end.region' in function 'main': +; CHECK-NEXT: main(): +; CHECK-NEXT: Stmt_for_body(0); diff --git a/polly/test/CodeGen/test.c b/polly/test/CodeGen/test.c new file mode 100644 index 00000000000..503e5f202df --- /dev/null +++ b/polly/test/CodeGen/test.c @@ -0,0 +1,19 @@ +int bar1(); +int bar2(); +int bar3(); +int k; +#define N 100 +int A[N]; + +int foo (int z) { + int i, j; + + for (i = 0; i < N; i++) { + A[i] = i; + + for (j = 0; j < N * 2; j++) + A[i] = j * A[i]; + } + + return A[z]; +} diff --git a/polly/test/CodeGen/test.ll b/polly/test/CodeGen/test.ll new file mode 100644 index 00000000000..812a8c2d812 --- /dev/null +++ b/polly/test/CodeGen/test.ll @@ -0,0 +1,48 @@ +; RUN: opt %loadPolly %defaultOpts -O3 -polly-cloog -analyze -S < %s | FileCheck %s +; XFAIL: * +; ModuleID = 'test.c' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [100 x i32] zeroinitializer, align 4 ; <[100 x i32]*> [#uses=2] +@k = common global i32 0, align 4 ; <i32*> [#uses=0] + +define i32 @foo(i32 %z) nounwind { +bb.nph31.split.us: + br label %bb.nph.us + +for.inc16.us: ; preds = %for.body6.us + store i32 %mul.us, i32* %arrayidx.us + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] + %exitcond32 = icmp eq i64 %indvar.next, 100 ; <i1> [#uses=1] + br i1 %exitcond32, label %for.end19, label %bb.nph.us + +for.body6.us: ; preds = %for.body6.us, %bb.nph.us + %arrayidx10.tmp.0.us = phi i32 [ %i.027.us, %bb.nph.us ], [ %mul.us, %for.body6.us ] ; <i32> [#uses=1] + %0 = phi i32 [ 0, %bb.nph.us ], [ %inc.us, %for.body6.us ] ; <i32> [#uses=2] + %mul.us = mul i32 %arrayidx10.tmp.0.us, %0 ; <i32> [#uses=2] + %inc.us = add nsw i32 %0, 1 ; <i32> [#uses=2] + %exitcond = icmp eq i32 %inc.us, 200 ; <i1> [#uses=1] + br i1 %exitcond, label %for.inc16.us, label %for.body6.us + +bb.nph.us: ; preds = %bb.nph31.split.us, %for.inc16.us + %indvar = phi i64 [ %indvar.next, %for.inc16.us ], [ 0, %bb.nph31.split.us ] ; <i64> [#uses=3] + %arrayidx.us = getelementptr [100 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=2] + %i.027.us = trunc i64 %indvar to i32 ; <i32> [#uses=2] + store i32 %i.027.us, i32* %arrayidx.us + br label %for.body6.us + +for.end19: ; preds = %for.inc16.us + %idxprom21 = sext i32 %z to i64 ; <i64> [#uses=1] + %arrayidx22 = getelementptr inbounds [100 x i32]* @A, i64 0, i64 %idxprom21 ; <i32*> [#uses=1] + %tmp23 = load i32* %arrayidx22 ; <i32> [#uses=1] + ret i32 %tmp23 +} +; CHECK: for (c2=0;c2<=99;c2++) { +; CHECK: S{{[0-4]}}(c2); +; CHECK: for (c4=0;c4<=199;c4++) { +; CHECK: S{{[[0-4]}}(c2,c4); +; CHECK: } +; CHECK: S{{[0-4]}}(c2); +; CHECK: } + diff --git a/polly/test/Makefile b/polly/test/Makefile new file mode 100755 index 00000000000..4b32e66586c --- /dev/null +++ b/polly/test/Makefile @@ -0,0 +1,55 @@ +LEVEL := .. +include $(LEVEL)/Makefile.common + +# Test in all immediate subdirectories if unset. +ifdef TESTSUITE +TESTDIRS := $(TESTSUITE:%=$(PROJ_SRC_DIR)/%) +else +TESTDIRS ?= $(PROJ_SRC_DIR) +endif + +# 'lit' wants objdir paths, so it will pick up the lit.site.cfg. +TESTDIRS := $(TESTDIRS:$(PROJ_SRC_DIR)%=$(PROJ_OBJ_DIR)%) + +# Allow EXTRA_TESTDIRS to provide additional test directories. +TESTDIRS += $(EXTRA_TESTDIRS) + +ifndef TESTARGS +ifdef VERBOSE +TESTARGS = -v +else +TESTARGS = -s -v +endif +endif + +# Make sure any extra test suites can find the main site config. +LIT_ARGS := --param polly_site_config=${PROJ_OBJ_DIR}/lit.site.cfg \ + --param build_config=$(PROJ_OBJ_DIR) + + +ifdef VG + LIT_ARGS += "--vg" +endif + +polly-test:: lit.site.cfg + @ echo '--- Running polly tests for $(TARGET_TRIPLE) ---' + @ $(PYTHON) $(LLVM_SRC_ROOT)/utils/lit/lit.py \ + $(LIT_ARGS) $(TESTARGS) $(TESTDIRS) + +FORCE: + +lit.site.cfg: FORCE + @echo "Making Polly 'lit.site.cfg' file..." + @sed -e "s#@LLVM_SOURCE_DIR@#$(LLVM_SRC_ROOT)#g" \ + -e "s#@LLVM_BINARY_DIR@#$(LLVM_OBJ_ROOT)#g" \ + -e "s#@LLVM_TOOLS_DIR@#$(LLVM_OBJ_ROOT)/$(BuildMode)/bin#g" \ + -e "s#@LLVM_LIBS_DIR@#$(LibDir)#g" \ + -e "s#@POLLY_SOURCE_DIR@#$(PROJ_SRC_DIR)/..#g" \ + -e "s#@POLLY_BINARY_DIR@#$(PROJ_OBJ_DIR)/..#g" \ + -e "s#@TARGET_TRIPLE@#$(TARGET_TRIPLE)#g" \ + $(PROJ_SRC_DIR)/lit.site.cfg.in > $@ + +clean:: + @ find . -name Output | xargs rm -fr + +.PHONY: polly-test clean diff --git a/polly/test/README b/polly/test/README new file mode 100644 index 00000000000..e8470c6de2e --- /dev/null +++ b/polly/test/README @@ -0,0 +1 @@ +place tests here
\ No newline at end of file diff --git a/polly/test/ScopInfo/Alias-0.ll b/polly/test/ScopInfo/Alias-0.ll new file mode 100755 index 00000000000..0b3d495ab26 --- /dev/null +++ b/polly/test/ScopInfo/Alias-0.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s -stats 2>&1 | FileCheck %s + +; ModuleID = '/tmp/webcompile/_17966_0.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-linux-gnu" + +define void @f(i32* nocapture %a, i32* nocapture %b) nounwind { +bb.nph: + %0 = tail call i32 (...)* @rnd() nounwind ; <i32> [#uses=1] + %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] + %iftmp.0.0 = select i1 %1, i32* %b, i32* %a ; <i32*> [#uses=2] + br label %bb3 + +bb3: ; preds = %bb3, %bb.nph + %i.06 = phi i64 [ 0, %bb.nph ], [ %tmp, %bb3 ] ; <i64> [#uses=3] + %scevgep = getelementptr i32* %a, i64 %i.06 ; <i32*> [#uses=1] + %scevgep7 = getelementptr i32* %iftmp.0.0, i64 %i.06 ; <i32*> [#uses=1] + %tmp = add i64 %i.06, 1 ; <i64> [#uses=3] + %scevgep8 = getelementptr i32* %iftmp.0.0, i64 %tmp ; <i32*> [#uses=1] + %2 = load i32* %scevgep, align 4 ; <i32> [#uses=1] + %3 = load i32* %scevgep8, align 4 ; <i32> [#uses=1] + %4 = shl i32 %3, 1 ; <i32> [#uses=1] + %5 = add nsw i32 %4, %2 ; <i32> [#uses=1] + store i32 %5, i32* %scevgep7, align 4 + %exitcond = icmp eq i64 %tmp, 64 ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb3 + +return: ; preds = %bb3 + ret void +} + +declare i32 @rnd(...) + + +; CHECK: 1 polly-detect - Number of bad regions for Scop: Found base address alias diff --git a/polly/test/ScopInfo/Alias-1.ll b/polly/test/ScopInfo/Alias-1.ll new file mode 100755 index 00000000000..ab1219f12fc --- /dev/null +++ b/polly/test/ScopInfo/Alias-1.ll @@ -0,0 +1,36 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s -stats 2>&1 | FileCheck %s + +; ModuleID = '/tmp/webcompile/_17966_0.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-linux-gnu" + +define void @f(i32* nocapture %a, i32* nocapture %b) nounwind { +bb.nph: + %0 = tail call i32 (...)* @rnd() nounwind ; <i32> [#uses=1] + %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] + %sel.b = getelementptr inbounds i32* %b, i64 4 + %iftmp.0.0 = select i1 %1, i32* %sel.b, i32* %a ; <i32*> [#uses=2] + br label %bb3 + +bb3: ; preds = %bb3, %bb.nph + %i.06 = phi i64 [ 0, %bb.nph ], [ %tmp, %bb3 ] ; <i64> [#uses=3] + %scevgep = getelementptr i32* %a, i64 %i.06 ; <i32*> [#uses=1] + %scevgep7 = getelementptr i32* %iftmp.0.0, i64 %i.06 ; <i32*> [#uses=1] + %tmp = add i64 %i.06, 1 ; <i64> [#uses=3] + %scevgep8 = getelementptr i32* %iftmp.0.0, i64 %tmp ; <i32*> [#uses=1] + %2 = load i32* %scevgep, align 4 ; <i32> [#uses=1] + %3 = load i32* %scevgep8, align 4 ; <i32> [#uses=1] + %4 = shl i32 %3, 1 ; <i32> [#uses=1] + %5 = add nsw i32 %4, %2 ; <i32> [#uses=1] + store i32 %5, i32* %scevgep7, align 4 + %exitcond = icmp eq i64 %tmp, 64 ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb3 + +return: ; preds = %bb3 + ret void +} + +declare i32 @rnd(...) + + +; CHECK: 1 polly-detect - Number of bad regions for Scop: Found base address alias diff --git a/polly/test/ScopInfo/Alias-2.ll b/polly/test/ScopInfo/Alias-2.ll new file mode 100755 index 00000000000..443ad0ca52d --- /dev/null +++ b/polly/test/ScopInfo/Alias-2.ll @@ -0,0 +1,33 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s -stats 2>&1 | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-linux-gnu" + +define void @f(i32** nocapture %ptrs, i64 %p0, i64 %p1, i64 %p2) nounwind { +bb.nph: + %0 = getelementptr inbounds i32** %ptrs, i64 %p0 ; <i32**> [#uses=1] + %1 = load i32** %0, align 8 ; <i32*> [#uses=1] + %2 = getelementptr inbounds i32** %ptrs, i64 %p1 ; <i32**> [#uses=1] + %3 = load i32** %2, align 8 ; <i32*> [#uses=1] + %4 = getelementptr inbounds i32** %ptrs, i64 %p2 ; <i32**> [#uses=1] + %5 = load i32** %4, align 8 ; <i32*> [#uses=1] + br label %bb + +bb: ; preds = %bb, %bb.nph + %i.03 = phi i64 [ 0, %bb.nph ], [ %tmp, %bb ] ; <i64> [#uses=3] + %scevgep = getelementptr i32* %3, i64 %i.03 ; <i32*> [#uses=1] + %scevgep4 = getelementptr i32* %5, i64 %i.03 ; <i32*> [#uses=1] + %tmp = add i64 %i.03, 1 ; <i64> [#uses=3] + %scevgep5 = getelementptr i32* %1, i64 %tmp ; <i32*> [#uses=1] + %6 = load i32* %scevgep, align 4 ; <i32> [#uses=1] + %7 = load i32* %scevgep4, align 4 ; <i32> [#uses=1] + %8 = add nsw i32 %7, %6 ; <i32> [#uses=1] + store i32 %8, i32* %scevgep5, align 4 + %exitcond = icmp eq i64 %tmp, 64 ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb + ret void +} + +; CHECK: 1 polly-detect - Number of bad regions for Scop: Found base address alias diff --git a/polly/test/ScopInfo/Alias-3.ll b/polly/test/ScopInfo/Alias-3.ll new file mode 100755 index 00000000000..0ea7c94f240 --- /dev/null +++ b/polly/test/ScopInfo/Alias-3.ll @@ -0,0 +1,27 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s -stats 2>&1 | FileCheck %s + +; ModuleID = '/tmp/webcompile/_22751_0.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-linux-gnu" + +define void @f(i32* nocapture %a, i32* nocapture %b) nounwind { +bb.nph: + br label %bb + +bb: ; preds = %bb, %bb.nph + %i.03 = phi i64 [ 0, %bb.nph ], [ %2, %bb ] ; <i64> [#uses=3] + %scevgep = getelementptr i32* %b, i64 %i.03 ; <i32*> [#uses=1] + %scevgep4 = getelementptr i32* %a, i64 %i.03 ; <i32*> [#uses=1] + %0 = load i32* %scevgep, align 4 ; <i32> [#uses=1] + %1 = add nsw i32 %0, 2 ; <i32> [#uses=1] + store i32 %1, i32* %scevgep4, align 4 + %2 = add nsw i64 %i.03, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %2, 128 ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb + ret void +} + + +; CHECK: 1 polly-detect - Number of bad regions for Scop: Found base address alias diff --git a/polly/test/ScopInfo/Alias-4.ll b/polly/test/ScopInfo/Alias-4.ll new file mode 100755 index 00000000000..9d594ac3a10 --- /dev/null +++ b/polly/test/ScopInfo/Alias-4.ll @@ -0,0 +1,30 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s -stats 2>&1 | not FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s -check-prefix=MAS + +; ModuleID = '/tmp/webcompile/_22751_0.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-linux-gnu" + +define void @f(i32* noalias nocapture %a, i32* noalias nocapture %b) nounwind { +bb.nph: + br label %bb + +bb: ; preds = %bb, %bb.nph + %i.03 = phi i64 [ 0, %bb.nph ], [ %2, %bb ] ; <i64> [#uses=3] + %scevgep = getelementptr i32* %b, i64 %i.03 ; <i32*> [#uses=1] + %scevgep4 = getelementptr i32* %a, i64 %i.03 ; <i32*> [#uses=1] + %0 = load i32* %scevgep, align 4 ; <i32> [#uses=1] + %1 = add nsw i32 %0, 2 ; <i32> [#uses=1] + store i32 %1, i32* %scevgep4, align 4 + %2 = add nsw i64 %i.03, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %2, 128 ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb + ret void +} + + +; CHECK: Found base address alias +; MAS: Reads %b[4 * {0,+,1}<nuw><nsw><%bb> + 0] Refs: Must alias {%b, } May alias {}, +; MAS: Writes %a[4 * {0,+,1}<nuw><nsw><%bb> + 0] Refs: Must alias {%a, } May alias {}, diff --git a/polly/test/ScopInfo/bad_loop_0.ll b/polly/test/ScopInfo/bad_loop_0.ll new file mode 100644 index 00000000000..bb00a11cd16 --- /dev/null +++ b/polly/test/ScopInfo/bad_loop_0.ll @@ -0,0 +1,45 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze -analyze %s | not FileCheck %s + +;void f(long a[][128], long N, long M) { +; long i, j; +; for (j = 0; j < M; ++j) +; for (i = 0; i < rnd(); ++i) +; ... +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f([128 x i64]* nocapture %a, i64 %N, i64 %M) nounwind { +entry: + %0 = icmp sgt i64 %M, 0 ; <i1> [#uses=1] + br i1 %0, label %bb2.preheader, label %return + +bb1: ; preds = %bb2.preheader, %bb1 + %i.06 = phi i64 [ 0, %bb2.preheader ], [ %1, %bb1 ] ; <i64> [#uses=3] + %scevgep = getelementptr [128 x i64]* %a, i64 %i.06, i64 %5 ; <i64*> [#uses=1] + %tmp = add i64 %i.06, %N ; <i64> [#uses=1] + store i64 %tmp, i64* %scevgep, align 8 + %1 = add nsw i64 %i.06, 1 ; <i64> [#uses=2] + %2 = tail call i64 (...)* @rnd() nounwind ; <i64> [#uses=1] + %3 = icmp sgt i64 %2, %1 ; <i1> [#uses=1] + br i1 %3, label %bb1, label %bb3 + +bb3: ; preds = %bb2.preheader, %bb1 + %4 = add i64 %5, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %4, %M ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb2.preheader + +bb2.preheader: ; preds = %bb3, %entry + %5 = phi i64 [ %4, %bb3 ], [ 0, %entry ] ; <i64> [#uses=2] + %6 = tail call i64 (...)* @rnd() nounwind ; <i64> [#uses=1] + %7 = icmp sgt i64 %6, 0 ; <i1> [#uses=1] + br i1 %7, label %bb1, label %bb3 + +return: ; preds = %bb3, %entry + ret void +} + +declare i64 @rnd(...) + +; CHECK: Scop! diff --git a/polly/test/ScopInfo/bad_loop_1.ll b/polly/test/ScopInfo/bad_loop_1.ll new file mode 100644 index 00000000000..e5421e09c57 --- /dev/null +++ b/polly/test/ScopInfo/bad_loop_1.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s -check-prefix=INDVAR +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s + +;void f(long a[][128], long N, long M) { +; long i, j; +; for (j = 0; j < rnd(); ++j) +; for (i = 0; i < N; ++i) +; a[i][j] = 0; +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f([128 x i64]* nocapture %a, i64 %N, i64 %M) nounwind { +entry: + %0 = tail call i64 (...)* @rnd() nounwind ; <i64> [#uses=1] + %1 = icmp sgt i64 %0, 0 ; <i1> [#uses=1] + br i1 %1, label %bb.nph8, label %return + +bb.nph8: ; preds = %entry + %2 = icmp sgt i64 %N, 0 ; <i1> [#uses=1] + br i1 %2, label %bb2.preheader.us, label %bb2.preheader + +bb2.preheader.us: ; preds = %bb2.bb3_crit_edge.us, %bb.nph8 + %3 = phi i64 [ 0, %bb.nph8 ], [ %tmp, %bb2.bb3_crit_edge.us ] ; <i64> [#uses=2] + %tmp = add i64 %3, 1 ; <i64> [#uses=2] + br label %bb1.us + +bb1.us: ; preds = %bb1.us, %bb2.preheader.us + %i.06.us = phi i64 [ 0, %bb2.preheader.us ], [ %4, %bb1.us ] ; <i64> [#uses=2] + %scevgep = getelementptr [128 x i64]* %a, i64 %i.06.us, i64 %3 ; <i64*> [#uses=1] + store i64 0, i64* %scevgep, align 8 + %4 = add nsw i64 %i.06.us, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %4, %N ; <i1> [#uses=1] + br i1 %exitcond, label %bb2.bb3_crit_edge.us, label %bb1.us + +bb2.bb3_crit_edge.us: ; preds = %bb1.us + %5 = tail call i64 (...)* @rnd() nounwind ; <i64> [#uses=1] + %6 = icmp sgt i64 %5, %tmp ; <i1> [#uses=1] + br i1 %6, label %bb2.preheader.us, label %return + +bb2.preheader: ; preds = %bb2.preheader, %bb.nph8 + %j.07 = phi i64 [ %tmp9, %bb2.preheader ], [ 0, %bb.nph8 ] ; <i64> [#uses=1] + %tmp9 = add i64 %j.07, 1 ; <i64> [#uses=2] + %7 = tail call i64 (...)* @rnd() nounwind ; <i64> [#uses=1] + %8 = icmp sgt i64 %7, %tmp9 ; <i1> [#uses=1] + br i1 %8, label %bb2.preheader, label %return + +return: ; preds = %bb2.preheader, %bb2.bb3_crit_edge.us, %entry + ret void +} + +declare i64 @rnd(...) + +; INDVAR: Scop: bb1.us => bb2.bb3_crit_edge.us Parameters: (%N, {0,+,1}<%bb2.preheader.us>, ), Max Loop Depth: 1 +; CHECK: Scop: bb1.us => bb2.bb3_crit_edge.us Parameters: (%N, {0,+,1}<%bb2.preheader.us>, ), Max Loop Depth: 1 diff --git a/polly/test/ScopInfo/bug_2010_07_16.ll b/polly/test/ScopInfo/bug_2010_07_16.ll new file mode 100755 index 00000000000..b01397562bb --- /dev/null +++ b/polly/test/ScopInfo/bug_2010_07_16.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s + +; ModuleID = '/home/ether/unexpected_parameter.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @mdct_sub48([2 x [576 x double]]* %mdct_freq) nounwind { +entry: + br label %bb54 + +bb4: ; preds = %bb54, %bb49 + br label %bb6 + +bb6: ; preds = %bb6, %bb4 + br i1 undef, label %bb6, label %bb48 + +bb24: ; preds = %bb48 + br i1 false, label %bb47, label %bb46 + +bb40: ; preds = %bb46 + %0 = load double* %scevgep74, align 8 ; <double> [#uses=0] + %indvar.next62 = add i64 %indvar61, 1 ; <i64> [#uses=1] + br label %bb46 + +bb46: ; preds = %bb40, %bb24 + %indvar61 = phi i64 [ %indvar.next62, %bb40 ], [ 0, %bb24 ] ; <i64> [#uses=1] + %scevgep74 = getelementptr [2 x [576 x double]]* %mdct_freq, i64 0, i64 %indvar1, i64 0 ; <double*> [#uses=1] + store double undef, double* %scevgep74, align 8 + br i1 false, label %bb40, label %bb47 + +bb47: ; preds = %bb46, %bb24 + br label %bb48 + +bb48: ; preds = %bb47, %bb6 + br i1 false, label %bb24, label %bb49 + +bb49: ; preds = %bb48 + br i1 undef, label %bb4, label %bb53 + +bb53: ; preds = %bb49 + %indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1] + br label %bb54 + +bb54: ; preds = %bb53, %entry + %indvar1 = phi i64 [ %indvar.next2, %bb53 ], [ 0, %entry ] ; <i64> [#uses=2] + br i1 undef, label %bb4, label %return + +return: ; preds = %bb54 + ret void +} + +; CHECK: Scop: bb24 => bb48.region Parameters: ({0,+,1}<%bb54>, ), Max Loop Depth: 1 diff --git a/polly/test/ScopInfo/bug_2010_10_22.ll b/polly/test/ScopInfo/bug_2010_10_22.ll new file mode 100755 index 00000000000..ce39fad8ce0 --- /dev/null +++ b/polly/test/ScopInfo/bug_2010_10_22.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir %s + +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define fastcc void @_Z8wavModelR5Mixer() { +entry: + br label %bb230 + +bb230: ; preds = %bb233, %bb.nph433 + %indvar600 = phi i64 [ 0, %entry ], [ %tmp610, %bb233 ] + %tmp217 = add i64 %indvar600, -1 + %tmp204 = trunc i64 %tmp217 to i32 + %tmp205 = zext i32 %tmp204 to i64 + %tmp206 = add i64 %tmp205, 1 + %tmp610 = add i64 %indvar600, 1 + br i1 false, label %bb231.preheader, label %bb233 + +bb231.preheader: ; preds = %bb230 + br label %bb231 + +bb231: ; preds = %bb231, %bb231.preheader + %indvar589 = phi i64 [ %tmp611, %bb231 ], [ 0, %bb231.preheader ] + %tmp611 = add i64 %indvar589, 1 + %exitcond207 = icmp eq i64 %tmp611, %tmp206 + br i1 %exitcond207, label %bb233.loopexit, label %bb231 + +bb233.loopexit: ; preds = %bb231 + br label %bb233 + +bb233: ; preds = %bb233.loopexit, %bb230 + %exitcond213 = icmp eq i64 %tmp610, 0 + br i1 %exitcond213, label %bb241, label %bb230 + +bb241: ; preds = %bb233, %bb228 + br label %bb244.preheader + +bb244.preheader: ; preds = %bb241, %bb176 + br i1 undef, label %bb245, label %bb.nph416 + +bb.nph416: ; preds = %bb244.preheader + unreachable + +bb245: ; preds = %bb244.preheader + ret void +} diff --git a/polly/test/ScopInfo/bug_2011_1_5.ll b/polly/test/ScopInfo/bug_2011_1_5.ll new file mode 100755 index 00000000000..12b6cc375c7 --- /dev/null +++ b/polly/test/ScopInfo/bug_2011_1_5.ll @@ -0,0 +1,61 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s + +; Bug description: Alias Analysis thinks IntToPtrInst aliases with alloca instructions created by IndependentBlocks Pass. +; This will trigger the assertion when we are verifying the SCoP after IndependentBlocks. + +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct.precisionType = type { i16, i16, i16, i8, [1 x i16] } + +define void @main() nounwind { +entry: + br label %bb1.i198.i + +bb1.i198.i: ; preds = %bb.i197.i, %psetq.exit196.i + %tmp51.i = inttoptr i64 0 to %struct.precisionType* + br i1 undef, label %bb1.i210.i, label %bb.i209.i + +bb.i209.i: ; preds = %bb1.i198.i + br label %bb1.i210.i + +bb1.i210.i: ; preds = %bb.i209.i, %bb1.i198.i + %0 = icmp eq i64 0, 0 + br i1 %0, label %bb1.i216.i, label %bb.i215.i + +bb.i215.i: ; preds = %bb1.i210.i + %1 = getelementptr inbounds %struct.precisionType* %tmp51.i, i64 0, i32 0 + store i16 undef, i16* %1, align 2 + br label %bb1.i216.i + +bb1.i216.i: ; preds = %bb.i215.i, %bb1.i210.i + br i1 undef, label %psetq.exit220.i, label %bb2.i217.i + +bb2.i217.i: ; preds = %bb1.i216.i + br i1 undef, label %bb3.i218.i, label %psetq.exit220.i + +bb3.i218.i: ; preds = %bb2.i217.i + br label %psetq.exit220.i + +psetq.exit220.i: ; preds = %bb3.i218.i, %bb2.i217.i, %bb1.i216.i + br i1 undef, label %bb14.i76, label %bb15.i77 + +bb14.i76: ; preds = %psetq.exit220.i + unreachable + +bb15.i77: ; preds = %psetq.exit220.i + br i1 %0, label %psetq.exit238.i, label %bb2.i235.i + +bb2.i235.i: ; preds = %bb15.i77 + br i1 undef, label %bb3.i236.i, label %psetq.exit238.i + +bb3.i236.i: ; preds = %bb2.i235.i + unreachable + +psetq.exit238.i: ; preds = %bb2.i235.i, %bb15.i77 + unreachable + +bb56.i.loopexit: ; preds = %psetq.exit172.i + unreachable +} diff --git a/polly/test/ScopInfo/bug_scev_not_fully_eval.ll b/polly/test/ScopInfo/bug_scev_not_fully_eval.ll new file mode 100644 index 00000000000..84a570fb9ae --- /dev/null +++ b/polly/test/ScopInfo/bug_scev_not_fully_eval.ll @@ -0,0 +1,36 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | not FileCheck %s +; ModuleID = '/home/ether/where_comes_the_indvar.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +@edge.8265 = external global [72 x i32], align 32 ; <[72 x i32]*> [#uses=1] + +define void @compact_unitcell_edges() nounwind { +bb.nph19: + br label %bb4 + +bb4: ; preds = %bb4, %bb.nph19 + %e.118 = phi i32 [ 0, %bb.nph19 ], [ %tmp23, %bb4 ] ; <i32> [#uses=1] + %i.017 = phi i32 [ 0, %bb.nph19 ], [ %0, %bb4 ] ; <i32> [#uses=1] + %tmp23 = add i32 %e.118, 8 ; <i32> [#uses=2] + %0 = add nsw i32 %i.017, 1 ; <i32> [#uses=2] + %exitcond42 = icmp eq i32 %0, 6 ; <i1> [#uses=1] + br i1 %exitcond42, label %bb.nph, label %bb4 + +bb.nph: ; preds = %bb4 + %tmp = sext i32 %tmp23 to i64 ; <i64> [#uses=1] + br label %bb7 + +bb7: ; preds = %bb7, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb7 ] ; <i64> [#uses=2] + %tmp21 = add i64 %tmp, %indvar ; <i64> [#uses=1] + %scevgep = getelementptr [72 x i32]* @edge.8265, i64 0, i64 %tmp21 ; <i32*> [#uses=1] + store i32 undef, i32* %scevgep, align 4 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br i1 undef, label %bb10, label %bb7 + +bb10: ; preds = %bb7 + ret void +} + +; CHECK: SCOP: diff --git a/polly/test/ScopInfo/cast.ll b/polly/test/ScopInfo/cast.ll new file mode 100644 index 00000000000..f26e7c4677c --- /dev/null +++ b/polly/test/ScopInfo/cast.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +;void f(long a[], long N, long M) { +; long i, j, k; +; for (j = 0; j < M; ++j) +; ((long*)j)[(long)a] = j; + +; for (j = 0; j < N; ++j) +; a[j] = (char)(M + j); +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* %a, i64 %N, i64 %M) nounwind { +entry: + %0 = icmp sgt i64 %M, 0 ; <i1> [#uses=1] + br i1 %0, label %bb.nph8, label %bb4.loopexit + +bb.nph8: ; preds = %entry + %1 = ptrtoint i64* %a to i64 ; <i64> [#uses=1] + br label %bb + +bb: ; preds = %bb, %bb.nph8 + %2 = phi i64 [ 0, %bb.nph8 ], [ %5, %bb ] ; <i64> [#uses=3] + %3 = inttoptr i64 %2 to i64* ; <i64*> [#uses=1] + %4 = getelementptr inbounds i64* %3, i64 %1 ; <i64*> [#uses=1] + store i64 %2, i64* %4, align 8 + %5 = add nsw i64 %2, 1 ; <i64> [#uses=2] + %exitcond10 = icmp eq i64 %5, %M ; <i1> [#uses=1] + br i1 %exitcond10, label %bb4.loopexit, label %bb + +bb3: ; preds = %bb4.loopexit, %bb3 + %j.16 = phi i64 [ 0, %bb4.loopexit ], [ %7, %bb3 ] ; <i64> [#uses=3] + %scevgep = getelementptr i64* %a, i64 %j.16 ; <i64*> [#uses=1] + %tmp = add i64 %j.16, %M ; <i64> [#uses=1] + %tmp9 = trunc i64 %tmp to i8 ; <i8> [#uses=1] + %6 = sext i8 %tmp9 to i64 ; <i64> [#uses=1] + store i64 %6, i64* %scevgep, align 8 + %7 = add nsw i64 %j.16, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %7, %N ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb3 + +bb4.loopexit: ; preds = %bb, %entry + %8 = icmp sgt i64 %N, 0 ; <i1> [#uses=1] + br i1 %8, label %bb3, label %return + +return: ; preds = %bb4.loopexit, %bb3 + ret void +} + +; CHECK: Scop: bb4.loopexit => return Parameters: (%N, ), Max Loop Depth: 1 diff --git a/polly/test/ScopInfo/cond_in_loop.ll b/polly/test/ScopInfo/cond_in_loop.ll new file mode 100644 index 00000000000..1ae2e29f1cc --- /dev/null +++ b/polly/test/ScopInfo/cond_in_loop.ll @@ -0,0 +1,48 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | not FileCheck %s + +;void f(long a[], long N, long M) { +; long i, j, k; +; for (j = 0; j < M; ++j) +; if (N > j) +; a[j] = j; +; else { +; a[j] = M; +; a[j - N] = 0; +; } +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* nocapture %a, i64 %N, i64 %M) nounwind { +entry: + %0 = icmp sgt i64 %M, 0 ; <i1> [#uses=1] + br i1 %0, label %bb, label %return + +bb: ; preds = %bb3, %entry + %1 = phi i64 [ 0, %entry ], [ %3, %bb3 ] ; <i64> [#uses=5] + %scevgep = getelementptr i64* %a, i64 %1 ; <i64*> [#uses=2] + %2 = icmp slt i64 %1, %N ; <i1> [#uses=1] + br i1 %2, label %bb1, label %bb2 + +bb1: ; preds = %bb + store i64 %1, i64* %scevgep, align 8 + br label %bb3 + +bb2: ; preds = %bb + %tmp7 = sub i64 %1, %N ; <i64> [#uses=1] + %scevgep8 = getelementptr i64* %a, i64 %tmp7 ; <i64*> [#uses=1] + store i64 %M, i64* %scevgep, align 8 + store i64 0, i64* %scevgep8, align 8 + br label %bb3 + +bb3: ; preds = %bb2, %bb1 + %3 = add nsw i64 %1, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %3, %M ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb3, %entry + ret void +} + +; CHECK: Scop! diff --git a/polly/test/ScopInfo/indvar_out_of_loop.ll b/polly/test/ScopInfo/indvar_out_of_loop.ll new file mode 100644 index 00000000000..c5a6f5869b7 --- /dev/null +++ b/polly/test/ScopInfo/indvar_out_of_loop.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; XFAIL: * + +;void f(long a[], long N) { +; long i; +; for (i = 0; i < N; ++i) +; a[i] = i; + +; a[2 *i + 5 ] = 0; +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* nocapture %a, i64 %N) nounwind { +entry: + %0 = icmp sgt i64 %N, 0 ; <i1> [#uses=1] + br i1 %0, label %bb, label %bb2 + +bb: ; preds = %bb, %entry + %1 = phi i64 [ 0, %entry ], [ %2, %bb ] ; <i64> [#uses=3] + %scevgep = getelementptr i64* %a, i64 %1 ; <i64*> [#uses=1] + store i64 %1, i64* %scevgep, align 8 + %2 = add nsw i64 %1, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %2, %N ; <i1> [#uses=1] + br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb + +bb1.bb2_crit_edge: ; preds = %bb + %phitmp = shl i64 %N, 1 ; <i64> [#uses=1] + %phitmp5 = add i64 %phitmp, 5 ; <i64> [#uses=1] + br label %bb2 + +bb2: ; preds = %bb1.bb2_crit_edge, %entry + %i.0.lcssa = phi i64 [ %phitmp5, %bb1.bb2_crit_edge ], [ 5, %entry ] ; <i64> [#uses=1] + %3 = getelementptr inbounds i64* %a, i64 %i.0.lcssa ; <i64*> [#uses=1] + store i64 0, i64* %3, align 8 + ret void +} + +; CHECK: Scop: entry => <Function Return> Parameters: (%N, ) diff --git a/polly/test/ScopInfo/indvar_out_of_loop_1.ll b/polly/test/ScopInfo/indvar_out_of_loop_1.ll new file mode 100644 index 00000000000..2ad2aa18385 --- /dev/null +++ b/polly/test/ScopInfo/indvar_out_of_loop_1.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; XFAIL: * +;void f(long a[], long N) { +; long i; +; for (i = 0; i < N; ++i) +; a[i] = i; + +; if (N > 0) +; a[2 * N + 5 ] = 0; +; else +; a[5] = 0; +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* nocapture %a, i64 %N) nounwind { +entry: + %0 = icmp sgt i64 %N, 0 ; <i1> [#uses=2] + br i1 %0, label %bb, label %bb4 + +bb: ; preds = %bb, %entry + %1 = phi i64 [ 0, %entry ], [ %2, %bb ] ; <i64> [#uses=3] + %scevgep = getelementptr i64* %a, i64 %1 ; <i64*> [#uses=1] + store i64 %1, i64* %scevgep, align 8 + %2 = add nsw i64 %1, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %2, %N ; <i1> [#uses=1] + br i1 %exitcond, label %bb2, label %bb + +bb2: ; preds = %bb + br i1 %0, label %bb3, label %bb4 + +bb3: ; preds = %bb2 + %3 = shl i64 %N, 1 ; <i64> [#uses=1] + %4 = add nsw i64 %3, 5 ; <i64> [#uses=1] + %5 = getelementptr inbounds i64* %a, i64 %4 ; <i64*> [#uses=1] + store i64 0, i64* %5, align 8 + ret void + +bb4: ; preds = %bb2, %entry + %6 = getelementptr inbounds i64* %a, i64 5 ; <i64*> [#uses=1] + store i64 0, i64* %6, align 8 + ret void +} + +; CHECK: Scop: entry => <Function Return> Parameters: (%N, ) diff --git a/polly/test/ScopInfo/indvar_out_of_loop_2.ll b/polly/test/ScopInfo/indvar_out_of_loop_2.ll new file mode 100644 index 00000000000..4cb0cf48b2a --- /dev/null +++ b/polly/test/ScopInfo/indvar_out_of_loop_2.ll @@ -0,0 +1,45 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; XFAIL: * + +;void f(long a[], long N, long M) { +; long i; + +; if (N > M) +; for (i = 0; i < N; ++i) +; a[i] = i; +; else +; i = M + 3; + +; a[2 *i + 5 ] = 0; +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* nocapture %a, i64 %N) nounwind { +entry: + %0 = icmp sgt i64 %N, 0 ; <i1> [#uses=1] + br i1 %0, label %bb, label %bb2 + +bb: ; preds = %bb, %entry + %1 = phi i64 [ 0, %entry ], [ %2, %bb ] ; <i64> [#uses=3] + %scevgep = getelementptr i64* %a, i64 %1 ; <i64*> [#uses=1] + store i64 %1, i64* %scevgep, align 8 + %2 = add nsw i64 %1, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %2, %N ; <i1> [#uses=1] + br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb + +bb1.bb2_crit_edge: ; preds = %bb + %phitmp = shl i64 %N, 1 ; <i64> [#uses=1] + %phitmp5 = add i64 %phitmp, 5 ; <i64> [#uses=1] + br label %bb2 + +bb2: ; preds = %bb1.bb2_crit_edge, %entry + %i.0.lcssa = phi i64 [ %phitmp5, %bb1.bb2_crit_edge ], [ 5, %entry ] ; <i64> [#uses=1] + %3 = getelementptr inbounds i64* %a, i64 %i.0.lcssa ; <i64*> [#uses=1] + store i64 0, i64* %3, align 8 + ret void +} + +; CHECK: Scop: entry => <Function Return> Parameters: (%N, %M ) diff --git a/polly/test/ScopInfo/indvar_out_of_loop_3.ll b/polly/test/ScopInfo/indvar_out_of_loop_3.ll new file mode 100755 index 00000000000..7d4c4d23819 --- /dev/null +++ b/polly/test/ScopInfo/indvar_out_of_loop_3.ll @@ -0,0 +1,53 @@ +; RUN: opt %loadPolly %defaultOpts -polly-prepare -polly-analyze-ir -analyze %s | FileCheck %s + +;void f(long a[], long n, long m) { +; long i0, i1; +; for (i0 = 0; i0 < 2 * n + m; ++i0)//loop0 +; a[i0] = n; + +; for (i1 = 0; i1 < i0 + m; ++i1)//loop1 +; a[i1] += 2; +;} + + +; ModuleID = '/tmp/webcompile/_19162_0.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-linux-gnu" + +define void @_Z1fPlll(i64* nocapture %a, i64 %n, i64 %m) nounwind { +entry: + %0 = shl i64 %n, 1 ; <i64> [#uses=1] + %1 = add nsw i64 %0, %m ; <i64> [#uses=3] + %2 = icmp sgt i64 %1, 0 ; <i1> [#uses=1] + br i1 %2, label %bb, label %bb4.preheader + +bb: ; preds = %bb, %entry + %i0.07 = phi i64 [ %3, %bb ], [ 0, %entry ] ; <i64> [#uses=2] + %scevgep11 = getelementptr i64* %a, i64 %i0.07 ; <i64*> [#uses=1] + store i64 %n, i64* %scevgep11, align 8 + %3 = add nsw i64 %i0.07, 1 ; <i64> [#uses=2] + %exitcond10 = icmp eq i64 %3, %1 ; <i1> [#uses=1] + br i1 %exitcond10, label %bb4.preheader, label %bb + +bb4.preheader: ; preds = %bb, %entry + %i0.0.lcssa = phi i64 [ 0, %entry ], [ %1, %bb ] ; <i64> [#uses=1] + %4 = add nsw i64 %i0.0.lcssa, %m ; <i64> [#uses=2] + %5 = icmp sgt i64 %4, 0 ; <i1> [#uses=1] + br i1 %5, label %bb3, label %return + +bb3: ; preds = %bb3, %bb4.preheader + %i1.06 = phi i64 [ %8, %bb3 ], [ 0, %bb4.preheader ] ; <i64> [#uses=2] + %scevgep = getelementptr i64* %a, i64 %i1.06 ; <i64*> [#uses=2] + %6 = load i64* %scevgep, align 8 ; <i64> [#uses=1] + %7 = add nsw i64 %6, 2 ; <i64> [#uses=1] + store i64 %7, i64* %scevgep, align 8 + %8 = add nsw i64 %i1.06, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %8, %4 ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb3 + +return: ; preds = %bb3, %bb4.preheader + ret void +} + + +; CHECK: Scop: entry.split => bb4.preheader.region Parameters: (%m, %n, ), Max Loop diff --git a/polly/test/ScopInfo/loop_affine_bound_0.ll b/polly/test/ScopInfo/loop_affine_bound_0.ll new file mode 100644 index 00000000000..205cf25c928 --- /dev/null +++ b/polly/test/ScopInfo/loop_affine_bound_0.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; XFAIL: * + +;void f(long a[][128], long N, long M) { +; long i, j; +; for (j = 0; j < (4*N + 7*M +3); ++j) +; for (i = 0; i < (5*N + 2); ++i) +; ... +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f([128 x i64]* nocapture %a, i64 %N, i64 %M) nounwind { +entry: + %0 = shl i64 %N, 2 ; <i64> [#uses=2] + %1 = mul i64 %M, 7 ; <i64> [#uses=2] + %2 = or i64 %0, 3 ; <i64> [#uses=1] + %3 = add nsw i64 %2, %1 ; <i64> [#uses=1] + %4 = icmp sgt i64 %3, 0 ; <i1> [#uses=1] + br i1 %4, label %bb.nph8, label %return + +bb1: ; preds = %bb2.preheader, %bb1 + %i.06 = phi i64 [ 0, %bb2.preheader ], [ %5, %bb1 ] ; <i64> [#uses=2] + %scevgep = getelementptr [128 x i64]* %a, i64 %i.06, i64 %10 ; <i64*> [#uses=1] + store i64 0, i64* %scevgep, align 8 + %5 = add nsw i64 %i.06, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %5, %8 ; <i1> [#uses=1] + br i1 %exitcond, label %bb3, label %bb1 + +bb3: ; preds = %bb1 + %6 = add i64 %10, 1 ; <i64> [#uses=2] + %exitcond14 = icmp eq i64 %6, %tmp13 ; <i1> [#uses=1] + br i1 %exitcond14, label %return, label %bb2.preheader + +bb.nph8: ; preds = %entry + %7 = mul i64 %N, 5 ; <i64> [#uses=1] + %8 = add nsw i64 %7, 2 ; <i64> [#uses=2] + %9 = icmp sgt i64 %8, 0 ; <i1> [#uses=1] + br i1 %9, label %bb.nph8.split, label %return + +bb.nph8.split: ; preds = %bb.nph8 + %tmp12 = add i64 %1, %0 ; <i64> [#uses=1] + %tmp13 = add i64 %tmp12, 3 ; <i64> [#uses=1] + br label %bb2.preheader + +bb2.preheader: ; preds = %bb.nph8.split, %bb3 + %10 = phi i64 [ 0, %bb.nph8.split ], [ %6, %bb3 ] ; <i64> [#uses=2] + br label %bb1 + +return: ; preds = %bb.nph8, %bb3, %entry + ret void +} + +; CHECK: entry => <Function Return> Parameters: (%N, %M, ) diff --git a/polly/test/ScopInfo/loop_affine_bound_1.ll b/polly/test/ScopInfo/loop_affine_bound_1.ll new file mode 100644 index 00000000000..f7567993ece --- /dev/null +++ b/polly/test/ScopInfo/loop_affine_bound_1.ll @@ -0,0 +1,54 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; XFAIL: * +;void f(long a[][128], long N, long M) { +; long i, j; +; for (j = 0; j < (4*N + 7*M +3); ++j) +; for (i = j; i < (5*N + 2); ++i) +; ... +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f([128 x i64]* nocapture %a, i64 %N, i64 %M) nounwind { +entry: + %0 = shl i64 %N, 2 ; <i64> [#uses=2] + %1 = mul i64 %M, 7 ; <i64> [#uses=2] + %2 = or i64 %0, 3 ; <i64> [#uses=1] + %3 = add nsw i64 %2, %1 ; <i64> [#uses=1] + %4 = icmp sgt i64 %3, 0 ; <i1> [#uses=1] + br i1 %4, label %bb.nph8, label %return + +bb1: ; preds = %bb2.preheader, %bb1 + %indvar = phi i64 [ 0, %bb2.preheader ], [ %indvar.next, %bb1 ] ; <i64> [#uses=2] + %scevgep = getelementptr [128 x i64]* %a, i64 %indvar, i64 %tmp10 ; <i64*> [#uses=1] + store i64 0, i64* %scevgep, align 8 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %indvar.next, %tmp9 ; <i1> [#uses=1] + br i1 %exitcond, label %bb3, label %bb1 + +bb3: ; preds = %bb2.preheader, %bb1 + %5 = add i64 %8, 1 ; <i64> [#uses=2] + %exitcond14 = icmp eq i64 %5, %tmp13 ; <i1> [#uses=1] + br i1 %exitcond14, label %return, label %bb2.preheader + +bb.nph8: ; preds = %entry + %6 = mul i64 %N, 5 ; <i64> [#uses=1] + %7 = add nsw i64 %6, 2 ; <i64> [#uses=2] + %tmp12 = add i64 %1, %0 ; <i64> [#uses=1] + %tmp13 = add i64 %tmp12, 3 ; <i64> [#uses=1] + br label %bb2.preheader + +bb2.preheader: ; preds = %bb.nph8, %bb3 + %8 = phi i64 [ 0, %bb.nph8 ], [ %5, %bb3 ] ; <i64> [#uses=4] + %tmp10 = mul i64 %8, 129 ; <i64> [#uses=1] + %tmp9 = sub i64 %7, %8 ; <i64> [#uses=1] + %9 = icmp sgt i64 %7, %8 ; <i1> [#uses=1] + br i1 %9, label %bb1, label %bb3 + +return: ; preds = %bb3, %entry + ret void +} + +; CHECK: entry => <Function Return> Parameters: (%N, %M, ) diff --git a/polly/test/ScopInfo/loop_affine_bound_2.ll b/polly/test/ScopInfo/loop_affine_bound_2.ll new file mode 100644 index 00000000000..69560b6b335 --- /dev/null +++ b/polly/test/ScopInfo/loop_affine_bound_2.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; XFAIL: * +;void f(long a[][128], long N, long M) { +; long i, j; +; for (j = 0; j < (4*N + 7*M +3); ++j) +; for (i = (7*j + 6*M -9); i < (3*j + 5*N + 2) ; ++i) +; a[i][j] = 0; +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f([128 x i64]* nocapture %a, i64 %N, i64 %M) nounwind { +entry: + %0 = shl i64 %N, 2 ; <i64> [#uses=2] + %1 = mul i64 %M, 7 ; <i64> [#uses=2] + %2 = or i64 %0, 3 ; <i64> [#uses=1] + %3 = add nsw i64 %2, %1 ; <i64> [#uses=1] + %4 = icmp sgt i64 %3, 0 ; <i1> [#uses=1] + br i1 %4, label %bb.nph8, label %return + +bb.nph8: ; preds = %entry + %tmp14 = mul i64 %M, 6 ; <i64> [#uses=1] + %tmp15 = add i64 %tmp14, -9 ; <i64> [#uses=2] + %tmp20 = add i64 %1, %0 ; <i64> [#uses=1] + %tmp21 = add i64 %tmp20, 3 ; <i64> [#uses=1] + %tmp25 = mul i64 %M, -6 ; <i64> [#uses=1] + %tmp26 = mul i64 %N, 5 ; <i64> [#uses=2] + %tmp27 = add i64 %tmp25, %tmp26 ; <i64> [#uses=1] + %tmp28 = add i64 %tmp27, 11 ; <i64> [#uses=1] + %tmp35 = add i64 %tmp26, 2 ; <i64> [#uses=1] + br label %bb + +bb: ; preds = %bb3, %bb.nph8 + %j.07 = phi i64 [ 0, %bb.nph8 ], [ %6, %bb3 ] ; <i64> [#uses=5] + %tmp17 = mul i64 %j.07, 897 ; <i64> [#uses=1] + %tmp24 = mul i64 %j.07, -4 ; <i64> [#uses=1] + %tmp13 = add i64 %tmp24, %tmp28 ; <i64> [#uses=1] + %tmp30 = mul i64 %j.07, 7 ; <i64> [#uses=1] + %tmp33 = add i64 %tmp30, %tmp15 ; <i64> [#uses=1] + %tmp34 = mul i64 %j.07, 3 ; <i64> [#uses=1] + %tmp36 = add i64 %tmp34, %tmp35 ; <i64> [#uses=1] + %5 = icmp sgt i64 %tmp36, %tmp33 ; <i1> [#uses=1] + br i1 %5, label %bb1, label %bb3 + +bb1: ; preds = %bb1, %bb + %indvar = phi i64 [ 0, %bb ], [ %indvar.next, %bb1 ] ; <i64> [#uses=2] + %tmp16 = add i64 %indvar, %tmp15 ; <i64> [#uses=1] + %scevgep = getelementptr [128 x i64]* %a, i64 %tmp16, i64 %tmp17 ; <i64*> [#uses=1] + store i64 0, i64* %scevgep, align 8 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %indvar.next, %tmp13 ; <i1> [#uses=1] + br i1 %exitcond, label %bb3, label %bb1 + +bb3: ; preds = %bb1, %bb + %6 = add nsw i64 %j.07, 1 ; <i64> [#uses=2] + %exitcond22 = icmp eq i64 %6, %tmp21 ; <i1> [#uses=1] + br i1 %exitcond22, label %return, label %bb + +return: ; preds = %bb3, %entry + ret void +} + +; CHECK: Scop: entry => <Function Return> Parameters: (%M, %N, ), Max Loop Depth: 2 diff --git a/polly/test/ScopInfo/loop_carry.ll b/polly/test/ScopInfo/loop_carry.ll new file mode 100644 index 00000000000..3f78493d3f7 --- /dev/null +++ b/polly/test/ScopInfo/loop_carry.ll @@ -0,0 +1,92 @@ +; RUN: opt %loadPolly %defaultOpts -polly-prepare -polly-scops -analyze %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-linux-gnu" + +;long f(long a[], long n) { +; long i, k; +; k = 1; +; for (i = 1; i < n; ++i) { +; a[i] = k * a[i - 1]; +; k = a[i + 3] + a[2 * i]; +; } +; return 0; +;} + +define i64 @f(i64* nocapture %a, i64 %n) nounwind { +entry: + %0 = icmp sgt i64 %n, 1 ; <i1> [#uses=1] + br i1 %0, label %bb.nph, label %bb2 + +bb.nph: ; preds = %entry + %tmp = add i64 %n, -1 ; <i64> [#uses=1] + %.pre = load i64* %a, align 8 ; <i64> [#uses=1] + br label %bb + +bb: ; preds = %bb, %bb.nph + %1 = phi i64 [ %.pre, %bb.nph ], [ %2, %bb ] ; <i64> [#uses=1] + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ] ; <i64> [#uses=3] + %k.05 = phi i64 [ 1, %bb.nph ], [ %5, %bb ] ; <i64> [#uses=1] + %tmp6 = add i64 %indvar, 1 ; <i64> [#uses=3] + %scevgep = getelementptr i64* %a, i64 %tmp6 ; <i64*> [#uses=1] + %2 = mul nsw i64 %1, %k.05 ; <i64> [#uses=2] + store i64 %2, i64* %scevgep, align 8 + %tmp7 = shl i64 %indvar, 1 ; <i64> [#uses=1] + %tmp11 = add i64 %indvar, 4 ; <i64> [#uses=1] + %tmp8 = add i64 %tmp7, 2 ; <i64> [#uses=1] + %scevgep12 = getelementptr i64* %a, i64 %tmp11 ; <i64*> [#uses=1] + %scevgep9 = getelementptr i64* %a, i64 %tmp8 ; <i64*> [#uses=1] + %3 = load i64* %scevgep9, align 8 ; <i64> [#uses=1] + %4 = load i64* %scevgep12, align 8 ; <i64> [#uses=1] + %5 = add nsw i64 %3, %4 ; <i64> [#uses=1] + %exitcond = icmp eq i64 %tmp6, %tmp ; <i1> [#uses=1] + br i1 %exitcond, label %bb2, label %bb + +bb2: ; preds = %bb, %entry + ret i64 0 +} + +; CHECK: Context: +; CHECK: [p0] -> { [] } +; CHECK: Statements { +; CHECK: Stmt_bb_nph +; CHECK: Domain := +; CHECK: [p0] -> { Stmt_bb_nph[] : p0 >= 2 }; +; CHECK: Scattering := +; CHECK: [p0] -> { Stmt_bb_nph[] -> scattering[0, 0, 0] }; +; CHECK: ReadAccess := +; CHECK: [p0] -> { Stmt_bb_nph[] -> MemRef_a[0] }; +; CHECK: WriteAccess := +; CHECK: [p0] -> { Stmt_bb_nph[] -> MemRef_k_05_reg2mem[0] }; +; CHECK: WriteAccess := +; CHECK: [p0] -> { Stmt_bb_nph[] -> MemRef__reg2mem[0] }; +; CHECK: Stmt_bb +; CHECK: Domain := +; CHECK: [p0] -> { Stmt_bb[i0] : i0 >= 0 and i0 <= -2 + p0 and p0 >= 2 }; +; CHECK: Scattering := +; CHECK: [p0] -> { Stmt_bb[i0] -> scattering[1, i0, 0] }; +; CHECK: ReadAccess := +; CHECK: [p0] -> { Stmt_bb[i0] -> MemRef__reg2mem[0] }; +; CHECK: ReadAccess := +; CHECK: [p0] -> { Stmt_bb[i0] -> MemRef_k_05_reg2mem[0] }; +; CHECK: WriteAccess := +; CHECK: [p0] -> { Stmt_bb[i0] -> MemRef_a[1 + i0] }; +; CHECK: ReadAccess := +; CHECK: [p0] -> { Stmt_bb[i0] -> MemRef_a[2 + 2i0] }; +; CHECK: ReadAccess := +; CHECK: [p0] -> { Stmt_bb[i0] -> MemRef_a[4 + i0] }; +; CHECK: WriteAccess := +; CHECK: [p0] -> { Stmt_bb[i0] -> MemRef_k_05_reg2mem[0] }; +; CHECK: WriteAccess := +; CHECK: [p0] -> { Stmt_bb[i0] -> MemRef__reg2mem[0] }; +; CHECK: FinalRead +; CHECK: Domain := +; CHECK: [p0] -> { FinalRead[0] }; +; CHECK: Scattering := +; CHECK: [p0] -> { FinalRead[i0] -> scattering[200000000, o1, o2] }; +; CHECK: ReadAccess := +; CHECK: [p0] -> { FinalRead[i0] -> MemRef_a[o0] }; +; CHECK: ReadAccess := +; CHECK: [p0] -> { FinalRead[i0] -> MemRef_k_05_reg2mem[o0] }; +; CHECK: ReadAccess := +; CHECK: -> { FinalRead[i0] -> MemRef__reg2mem[o0] }; diff --git a/polly/test/ScopInfo/loop_complex_parameter.ll b/polly/test/ScopInfo/loop_complex_parameter.ll new file mode 100644 index 00000000000..c10d4103616 --- /dev/null +++ b/polly/test/ScopInfo/loop_complex_parameter.ll @@ -0,0 +1,57 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; XFAIL: * + +;void f(long a[][128], long N, long M) { +; long i, j; +; for (j = 0; j < (4*N + 7*M +3); ++j) +; for (i = 0; i < (5*N*M + 2); ++i) +; ... +;} + +define void @f([128 x i64]* nocapture %a, i64 %N, i64 %M) nounwind { +entry: + %0 = shl i64 %N, 2 ; <i64> [#uses=2] + %1 = mul i64 %M, 7 ; <i64> [#uses=2] + %2 = or i64 %0, 3 ; <i64> [#uses=1] + %3 = add nsw i64 %2, %1 ; <i64> [#uses=1] + %4 = icmp sgt i64 %3, 0 ; <i1> [#uses=1] + br i1 %4, label %bb.nph8, label %return + +bb1: ; preds = %bb2.preheader, %bb1 + %i.06 = phi i64 [ 0, %bb2.preheader ], [ %5, %bb1 ] ; <i64> [#uses=2] + %scevgep = getelementptr [128 x i64]* %a, i64 %i.06, i64 %11 ; <i64*> [#uses=1] + store i64 0, i64* %scevgep, align 8 + %5 = add nsw i64 %i.06, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %5, %tmp10 ; <i1> [#uses=1] + br i1 %exitcond, label %bb3, label %bb1 + +bb3: ; preds = %bb1 + %6 = add i64 %11, 1 ; <i64> [#uses=2] + %exitcond15 = icmp eq i64 %6, %tmp14 ; <i1> [#uses=1] + br i1 %exitcond15, label %return, label %bb2.preheader + +bb.nph8: ; preds = %entry + %7 = mul i64 %N, 5 ; <i64> [#uses=1] + %8 = mul i64 %7, %M ; <i64> [#uses=1] + %9 = add nsw i64 %8, 2 ; <i64> [#uses=1] + %10 = icmp sgt i64 %9, 0 ; <i1> [#uses=1] + br i1 %10, label %bb.nph8.split, label %return + +bb.nph8.split: ; preds = %bb.nph8 + %tmp = mul i64 %M, %N ; <i64> [#uses=1] + %tmp9 = mul i64 %tmp, 5 ; <i64> [#uses=1] + %tmp10 = add i64 %tmp9, 2 ; <i64> [#uses=1] + %tmp13 = add i64 %1, %0 ; <i64> [#uses=1] + %tmp14 = add i64 %tmp13, 3 ; <i64> [#uses=1] + br label %bb2.preheader + +bb2.preheader: ; preds = %bb.nph8.split, %bb3 + %11 = phi i64 [ 0, %bb.nph8.split ], [ %6, %bb3 ] ; <i64> [#uses=2] + br label %bb1 + +return: ; preds = %bb.nph8, %bb3, %entry + ret void +} + +; CHECK: TO BE WRITTEN diff --git a/polly/test/ScopInfo/loop_depth_0.ll b/polly/test/ScopInfo/loop_depth_0.ll new file mode 100644 index 00000000000..f846e7d032d --- /dev/null +++ b/polly/test/ScopInfo/loop_depth_0.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; XFAIL: * +;void f(long a[][128], long N, long M) { +; long i, j, k; +; for (j = 0; j < M; ++j) +; for (i = 0; i < N; ++i) +; a[i][j] = 4*i+5; + +; for (k = 0; k < 64; ++k) +; a[4][k] = a[k][4]; +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f([128 x i64]* nocapture %a, i64 %N, i64 %M) nounwind { +entry: + %0 = icmp sgt i64 %M, 0 ; <i1> [#uses=1] + %1 = icmp sgt i64 %N, 0 ; <i1> [#uses=1] + %or.cond = and i1 %0, %1 ; <i1> [#uses=1] + br i1 %or.cond, label %bb2.preheader, label %bb6 + +bb1: ; preds = %bb2.preheader, %bb1 + %i.010 = phi i64 [ 0, %bb2.preheader ], [ %2, %bb1 ] ; <i64> [#uses=3] + %tmp = shl i64 %i.010, 2 ; <i64> [#uses=1] + %tmp16 = add i64 %tmp, 5 ; <i64> [#uses=1] + %scevgep17 = getelementptr [128 x i64]* %a, i64 %i.010, i64 %4 ; <i64*> [#uses=1] + store i64 %tmp16, i64* %scevgep17, align 8 + %2 = add nsw i64 %i.010, 1 ; <i64> [#uses=2] + %exitcond15 = icmp eq i64 %2, %N ; <i1> [#uses=1] + br i1 %exitcond15, label %bb3, label %bb1 + +bb3: ; preds = %bb1 + %3 = add i64 %4, 1 ; <i64> [#uses=2] + %exitcond18 = icmp eq i64 %3, %M ; <i1> [#uses=1] + br i1 %exitcond18, label %bb6, label %bb2.preheader + +bb2.preheader: ; preds = %bb3, %entry + %4 = phi i64 [ %3, %bb3 ], [ 0, %entry ] ; <i64> [#uses=2] + br label %bb1 + +bb6: ; preds = %bb6, %bb3, %entry + %k.09 = phi i64 [ %6, %bb6 ], [ 0, %bb3 ], [ 0, %entry ] ; <i64> [#uses=3] + %scevgep = getelementptr [128 x i64]* %a, i64 4, i64 %k.09 ; <i64*> [#uses=1] + %scevgep14 = getelementptr [128 x i64]* %a, i64 %k.09, i64 4 ; <i64*> [#uses=1] + %5 = load i64* %scevgep14, align 8 ; <i64> [#uses=1] + store i64 %5, i64* %scevgep, align 8 + %6 = add nsw i64 %k.09, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %6, 64 ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb6 + +return: ; preds = %bb6 + ret void +} + +; CHECK: Scop: entry => <Function Return> Parameters: (%N, %M, ), Max Loop Depth: 2 diff --git a/polly/test/ScopInfo/loop_multi_exits.ll b/polly/test/ScopInfo/loop_multi_exits.ll new file mode 100644 index 00000000000..0898ca5468d --- /dev/null +++ b/polly/test/ScopInfo/loop_multi_exits.ll @@ -0,0 +1,101 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s -check-prefix=INDVAR +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; XFAIL: * +;From pollybench. +;void f(long A[][128], long n) { +; long k, i, j; +; for (k = 0; k < n; k++) { +; for (j = k + 1; j < n; j++) +; A[k][j] = A[k][j] / A[k][k]; +; for(i = k + 1; i < n; i++) +; for (j = k + 1; j < n; j++) +; A[i][j] = A[i][j] - A[i][k] * A[k][j]; +; } +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-linux-gnu" + +define void @f([128 x i64]* nocapture %A, i64 %n) nounwind { +entry: + %0 = icmp sgt i64 %n, 0 ; <i1> [#uses=1] + br i1 %0, label %bb.nph30, label %return + +bb.nph: ; preds = %bb2.preheader + %1 = getelementptr inbounds [128 x i64]* %A, i64 %k.023, i64 %k.023 ; <i64*> [#uses=1] + %tmp31 = sub i64 %tmp, %k.023 ; <i64> [#uses=1] + %tmp32 = mul i64 %k.023, 129 ; <i64> [#uses=1] + %tmp33 = add i64 %tmp32, 1 ; <i64> [#uses=1] + br label %bb1 + +bb1: ; preds = %bb1, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb1 ] ; <i64> [#uses=2] + %tmp34 = add i64 %tmp33, %indvar ; <i64> [#uses=1] + %scevgep = getelementptr [128 x i64]* %A, i64 0, i64 %tmp34 ; <i64*> [#uses=2] + %2 = load i64* %scevgep, align 8 ; <i64> [#uses=1] + %3 = load i64* %1, align 8 ; <i64> [#uses=1] + %4 = sdiv i64 %2, %3 ; <i64> [#uses=1] + store i64 %4, i64* %scevgep, align 8 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %indvar.next, %tmp31 ; <i1> [#uses=1] + br i1 %exitcond, label %bb8.loopexit, label %bb1 + +bb.nph16: ; preds = %bb.nph22, %bb8.loopexit12 + %indvar39 = phi i64 [ 0, %bb.nph22 ], [ %tmp51, %bb8.loopexit12 ] ; <i64> [#uses=2] + %tmp48 = add i64 %j.013, %indvar39 ; <i64> [#uses=1] + %tmp51 = add i64 %indvar39, 1 ; <i64> [#uses=3] + %scevgep53 = getelementptr [128 x i64]* %A, i64 %tmp51, i64 %tmp52 ; <i64*> [#uses=1] + %tmp37 = sub i64 %n, %j.013 ; <i64> [#uses=1] + br label %bb5 + +bb5: ; preds = %bb5, %bb.nph16 + %indvar35 = phi i64 [ 0, %bb.nph16 ], [ %indvar.next36, %bb5 ] ; <i64> [#uses=2] + %tmp49 = add i64 %j.013, %indvar35 ; <i64> [#uses=2] + %scevgep43 = getelementptr [128 x i64]* %A, i64 %tmp48, i64 %tmp49 ; <i64*> [#uses=2] + %scevgep44 = getelementptr [128 x i64]* %A, i64 %k.023, i64 %tmp49 ; <i64*> [#uses=1] + %5 = load i64* %scevgep43, align 8 ; <i64> [#uses=1] + %6 = load i64* %scevgep53, align 8 ; <i64> [#uses=1] + %7 = load i64* %scevgep44, align 8 ; <i64> [#uses=1] + %8 = mul nsw i64 %7, %6 ; <i64> [#uses=1] + %9 = sub nsw i64 %5, %8 ; <i64> [#uses=1] + store i64 %9, i64* %scevgep43, align 8 + %indvar.next36 = add i64 %indvar35, 1 ; <i64> [#uses=2] + %exitcond38 = icmp eq i64 %indvar.next36, %tmp37 ; <i1> [#uses=1] + br i1 %exitcond38, label %bb8.loopexit12, label %bb5 + +bb8.loopexit: ; preds = %bb1 + br i1 %10, label %bb.nph22, label %return + +bb8.loopexit12: ; preds = %bb5 + %exitcond47 = icmp eq i64 %tmp51, %tmp46 ; <i1> [#uses=1] + br i1 %exitcond47, label %bb10.loopexit, label %bb.nph16 + +bb.nph22: ; preds = %bb8.loopexit + %tmp46 = sub i64 %tmp, %k.023 ; <i64> [#uses=1] + %tmp52 = mul i64 %k.023, 129 ; <i64> [#uses=1] + br label %bb.nph16 + +bb10.loopexit: ; preds = %bb8.loopexit12 + br i1 %10, label %bb2.preheader, label %return + +bb.nph30: ; preds = %entry + %tmp = add i64 %n, -1 ; <i64> [#uses=2] + br label %bb2.preheader + +bb2.preheader: ; preds = %bb.nph30, %bb10.loopexit + %k.023 = phi i64 [ 0, %bb.nph30 ], [ %j.013, %bb10.loopexit ] ; <i64> [#uses=8] + %j.013 = add i64 %k.023, 1 ; <i64> [#uses=5] + %10 = icmp slt i64 %j.013, %n ; <i1> [#uses=3] + br i1 %10, label %bb.nph, label %return + +return: ; preds = %bb2.preheader, %bb10.loopexit, %bb8.loopexit, %entry + ret void +} + +; CHECK: Scop: bb5 => bb8.loopexit12 Parameters: ({0,+,1}<%bb2.preheader>, %n, {0,+,1}<%bb.nph16>, ), Max Loop Depth: 1 +; CHECK: Scop: bb.nph16 => bb10.loopexit Parameters: ({0,+,1}<%bb2.preheader>, %n, ), Max Loop Depth: 2 +; CHECK: Scop: bb1 => bb8.loopexit Parameters: ({0,+,1}<%bb2.preheader>, %n, ), Max Loop Depth: 1 + +; INDVAR: Scop: bb1 => bb8.loopexit Parameters: ({0,+,1}<%bb2.preheader>, %n, ), Max Loop Depth: 1 +; INDVAR: Scop: bb.nph16 => bb10.loopexit Parameters: ({0,+,1}<%bb2.preheader>, %n, ), Max Loop Depth: 2 +; INDVAR: Scop: bb5 => bb8.loopexit12 Parameters: ({0,+,1}<%bb2.preheader>, %n, {0,+,1}<%bb.nph16>, ), Max Loop Depth: 1 diff --git a/polly/test/ScopInfo/nest_loop_0.ll b/polly/test/ScopInfo/nest_loop_0.ll new file mode 100644 index 00000000000..1a6ce2cbcef --- /dev/null +++ b/polly/test/ScopInfo/nest_loop_0.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s + +;void f(long a[][128], long N, long M) { +; long i, j; +; for (j = 0; j < M; ++j) +; for (i = 0; i < N; ++i) +; ... +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f([128 x i64]* nocapture %a, i64 %N, i64 %M) nounwind { +entry: + %0 = icmp sgt i64 %M, 0 ; <i1> [#uses=1] + %1 = icmp sgt i64 %N, 0 ; <i1> [#uses=1] + %or.cond = and i1 %0, %1 ; <i1> [#uses=1] + br i1 %or.cond, label %bb2.preheader, label %return + +bb1: ; preds = %bb2.preheader, %bb1 + %i.06 = phi i64 [ 0, %bb2.preheader ], [ %2, %bb1 ] ; <i64> [#uses=3] + %scevgep = getelementptr [128 x i64]* %a, i64 %i.06, i64 %4 ; <i64*> [#uses=1] + %tmp = add i64 %i.06, %N ; <i64> [#uses=1] + store i64 %tmp, i64* %scevgep, align 8 + %2 = add nsw i64 %i.06, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %2, %N ; <i1> [#uses=1] + br i1 %exitcond, label %bb3, label %bb1 + +bb3: ; preds = %bb1 + %3 = add i64 %4, 1 ; <i64> [#uses=2] + %exitcond9 = icmp eq i64 %3, %M ; <i1> [#uses=1] + br i1 %exitcond9, label %return, label %bb2.preheader + +bb2.preheader: ; preds = %bb3, %entry + %4 = phi i64 [ %3, %bb3 ], [ 0, %entry ] ; <i64> [#uses=2] + br label %bb1 + +return: ; preds = %bb3, %entry + ret void +} + +; CHECK: Scop: bb2.preheader => return.single_exit Parameters: (%M, %N, ), Max Loop Depth: 2 diff --git a/polly/test/ScopInfo/out_of_loop_0.ll b/polly/test/ScopInfo/out_of_loop_0.ll new file mode 100644 index 00000000000..776f96a1eac --- /dev/null +++ b/polly/test/ScopInfo/out_of_loop_0.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; XFAIL: * +;void f(long a[], long N, long M) { +; long i; + +; for (i = 0; i < N; ++i) +; a[i] = i; + +; a[M] = 0; +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* nocapture %a, i64 %N, i64 %M) nounwind { +entry: + %0 = icmp sgt i64 %N, 0 ; <i1> [#uses=1] + br i1 %0, label %bb, label %bb2 + +bb: ; preds = %bb, %entry + %1 = phi i64 [ 0, %entry ], [ %2, %bb ] ; <i64> [#uses=3] + %scevgep = getelementptr i64* %a, i64 %1 ; <i64*> [#uses=1] + store i64 %1, i64* %scevgep, align 8 + %2 = add nsw i64 %1, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %2, %N ; <i1> [#uses=1] + br i1 %exitcond, label %bb2, label %bb + +bb2: ; preds = %bb, %entry + %3 = getelementptr inbounds i64* %a, i64 %M ; <i64*> [#uses=1] + store i64 0, i64* %3, align 8 + ret void +} + +; CHECK: Scop: entry => <Function Return> Parameters: (%N, %M, ) diff --git a/polly/test/ScopInfo/phi_not_grouped_at_top.ll b/polly/test/ScopInfo/phi_not_grouped_at_top.ll new file mode 100755 index 00000000000..648a36a24e8 --- /dev/null +++ b/polly/test/ScopInfo/phi_not_grouped_at_top.ll @@ -0,0 +1,27 @@ +; RUN: opt %loadPolly %defaultOpts -polly-prepare -analyze %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-linux-gnu" + +declare i32 @funa() align 2 + +define void @funb() align 2 { +entry: + br label %bb117 + +bb117: ; preds = %bb56 + %0 = invoke i32 @funa() + to label %bb121 unwind label %invcont118 ; <%struct.btHullTriangle*> [#uses=1] + +invcont118: ; preds = %bb117 + br label %bb121 + +bb121: ; preds = %bb120, %invcont118 + %iftmp.82.0 = phi i32 [ 0, %bb117 ], [ 1, %invcont118 ] ; <i8> [#uses=1] + %te.1 = phi i32 [ undef, %invcont118 ], [ %0, %bb117 ] ; + %cnd = icmp ne i32 %iftmp.82.0, %te.1 ; <i1> [#uses=1] + br label %return + +return: ; preds = %entry + ret void +} diff --git a/polly/test/ScopInfo/phi_with_invoke_edge.ll b/polly/test/ScopInfo/phi_with_invoke_edge.ll new file mode 100755 index 00000000000..cb77a004a73 --- /dev/null +++ b/polly/test/ScopInfo/phi_with_invoke_edge.ll @@ -0,0 +1,26 @@ +; RUN: opt %loadPolly %defaultOpts -polly-prepare -polly-detect -analyze %s +; ModuleID = '/tmp/invoke_edge_not_supported.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-linux-gnu" + + +define i16 @v() { +entry: + br i1 undef, label %bb16, label %invcont12 + +invcont12: ; preds = %invcont11 + %a = invoke i16 @v() to label %return unwind label %lpad22 ; <i16*> [#uses=1] + +bb16: ; preds = %bb7 + br i1 undef, label %bb9, label %return + +return: ; preds = %bb16, %invcont12 + %b = phi i16 [ %a, %invcont12 ], [ 0, %bb16 ] ; <i16*> [#uses=1] + ret i16 %b + +bb9: ; preds = %bb3 + ret i16 0 + +lpad22: ; preds = %invcont12 + unreachable +} diff --git a/polly/test/ScopInfo/simple_loop_0.ll b/polly/test/ScopInfo/simple_loop_0.ll new file mode 100644 index 00000000000..40900ca5eae --- /dev/null +++ b/polly/test/ScopInfo/simple_loop_0.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s -check-prefix=WITHAF +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s + +;void f(long a[], long N) { +; long i; +; for (i = 0; i < 128; ++i) +; a[i] = a[i] - a[i + 2]; +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* nocapture %a, i64 %N) nounwind { +entry: + br label %bb + +bb: ; preds = %bb, %entry + %i.03 = phi i64 [ 0, %entry ], [ %3, %bb ] ; <i64> [#uses=3] + %scevgep = getelementptr i64* %a, i64 %i.03 ; <i64*> [#uses=2] + %tmp = add i64 %i.03, 2 ; <i64> [#uses=1] + %scevgep4 = getelementptr i64* %a, i64 %tmp ; <i64*> [#uses=1] + %0 = load i64* %scevgep, align 8 ; <i64> [#uses=1] + %1 = load i64* %scevgep4, align 8 ; <i64> [#uses=1] + %2 = sub i64 %0, %1 ; <i64> [#uses=1] + store i64 %2, i64* %scevgep, align 8 + %3 = add nsw i64 %i.03, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %3, 128 ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb + ret void +} + +; CHECK: Scop: bb => return Parameters: () +; WITHAF: Scop: bb => return Parameters: (), Max Loop Depth: 1 +; WITHAF: Bounds of Loop: bb: { 127 } +; WITHAF: BB: bb{ +; WITHAF: Reads %a[8 * {0,+,1}<nuw><nsw><%bb> + 0] +; WITHAF: Reads %a[8 * {0,+,1}<nuw><nsw><%bb> + 16] +; WITHAF: Writes %a[8 * {0,+,1}<nuw><nsw><%bb> + 0] +; WITHAF: } diff --git a/polly/test/ScopInfo/simple_loop_1.ll b/polly/test/ScopInfo/simple_loop_1.ll new file mode 100644 index 00000000000..f718d1b3693 --- /dev/null +++ b/polly/test/ScopInfo/simple_loop_1.ll @@ -0,0 +1,30 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s +; XFAIL: * +;void f(int a[], int N) { +; int i; +; for (i = 0; i < N; ++i) +; ... +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* nocapture %a, i64 %N) nounwind { +entry: + %0 = icmp sgt i64 %N, 0 ; <i1> [#uses=1] + br i1 %0, label %bb, label %return + +bb: ; preds = %bb, %entry + %1 = phi i64 [ 0, %entry ], [ %2, %bb ] ; <i64> [#uses=3] + %scevgep = getelementptr i64* %a, i64 %1 ; <i64*> [#uses=1] + store i64 %1, i64* %scevgep, align 8 + %2 = add nsw i64 %1, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %2, %N ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} + +; CHECK: Scop: entry => <Function Return> Parameters: (%N, ) diff --git a/polly/test/ScopInfo/static_known_0.ll b/polly/test/ScopInfo/static_known_0.ll new file mode 100644 index 00000000000..0435d5094bf --- /dev/null +++ b/polly/test/ScopInfo/static_known_0.ll @@ -0,0 +1,39 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze %s | FileCheck %s + + +;void f(long a[], long N) { +; long M = rnd(); +; long i; + +; for (i = 0; i < M; ++i) +; a[i] = i; + +; a[N] = 0; +;} + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* nocapture %a, i64 %N) nounwind { +entry: + %0 = tail call i64 (...)* @rnd() nounwind ; <i64> [#uses=2] + %1 = icmp sgt i64 %0, 0 ; <i1> [#uses=1] + br i1 %1, label %bb, label %bb2 + +bb: ; preds = %bb, %entry + %2 = phi i64 [ 0, %entry ], [ %3, %bb ] ; <i64> [#uses=3] + %scevgep = getelementptr i64* %a, i64 %2 ; <i64*> [#uses=1] + store i64 %2, i64* %scevgep, align 8 + %3 = add nsw i64 %2, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %3, %0 ; <i1> [#uses=1] + br i1 %exitcond, label %bb2, label %bb + +bb2: ; preds = %bb, %entry + %4 = getelementptr inbounds i64* %a, i64 %N ; <i64*> [#uses=1] + store i64 0, i64* %4, align 8 + ret void +} + +declare i64 @rnd(...) + +; CHECK: Scop: bb => bb2.single_exit Parameters: (%0, ), Max Loop Depth: 1 diff --git a/polly/test/ScopInfo/sum.ll b/polly/test/ScopInfo/sum.ll new file mode 100644 index 00000000000..0afd5adb7e4 --- /dev/null +++ b/polly/test/ScopInfo/sum.ll @@ -0,0 +1,49 @@ +; RUN: opt %loadPolly %defaultOpts -polly-prepare -polly-analyze-ir -analyze %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-linux-gnu" + +;long f(long a[], long n) { +; long i, k; +; k = 0; +; for (i = 1; i < n; ++i) { +; k += a[i]; +; } +; return k; +;} + + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-linux-gnu" + +define i64 @f(i64* nocapture %a, i64 %n) nounwind readonly { +entry: + %0 = icmp sgt i64 %n, 1 ; <i1> [#uses=1] + br i1 %0, label %bb.nph, label %bb2 + +bb.nph: ; preds = %entry + %tmp = add i64 %n, -1 ; <i64> [#uses=1] + br label %bb + +bb: ; preds = %bb, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ] ; <i64> [#uses=1] + %k.05 = phi i64 [ 0, %bb.nph ], [ %2, %bb ] ; <i64> [#uses=1] + %tmp6 = add i64 %indvar, 1 ; <i64> [#uses=3] + %scevgep = getelementptr i64* %a, i64 %tmp6 ; <i64*> [#uses=1] + %1 = load i64* %scevgep, align 8 ; <i64> [#uses=1] + %2 = add nsw i64 %1, %k.05 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %tmp6, %tmp ; <i1> [#uses=1] + br i1 %exitcond, label %bb2, label %bb + +bb2: ; preds = %bb, %entry + %k.0.lcssa = phi i64 [ 0, %entry ], [ %2, %bb ] ; <i64> [#uses=1] + ret i64 %k.0.lcssa +} + +; CHECK: Bounds of Loop: bb: { 1 * %n + -2 } +; CHECK: BB: bb{ +; CHECK: Reads %k.05.reg2mem[0] Refs: Must alias {%k.05.reg2mem, } May alias {}, +; CHECK: Reads %a[8 * {0,+,1}<%bb> + 8] Refs: Must alias {%a, } May alias {}, +; CHECK: Writes %k.0.lcssa.reg2mem[0] Refs: Must alias {%k.0.lcssa.reg2mem, } May alias {}, +; CHECK: Writes %k.05.reg2mem[0] Refs: Must alias {%k.05.reg2mem, } May alias {}, +; CHECK: } diff --git a/polly/test/ScopInfo/undef_in_cond.ll b/polly/test/ScopInfo/undef_in_cond.ll new file mode 100644 index 00000000000..1b021abaab4 --- /dev/null +++ b/polly/test/ScopInfo/undef_in_cond.ll @@ -0,0 +1,24 @@ +; RUN: opt %loadPolly %defaultOpts -polly-scops %s -analyze %s | not FileCheck %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define fastcc void @fix_operands() nounwind { +entry: + br i1 undef, label %bb3, label %bb1 + +bb1: ; preds = %bb + %0 = icmp eq i32 0, undef ; <i1> [#uses=1] + br i1 %0, label %bb3, label %bb2 + +bb2: ; preds = %bb1 + br label %bb3 + +bb3: ; preds = %bb2, %bb1, %bb + br label %bb14 + +bb14: ; preds = %bb5, %bb4, %bb3, %entry + ret void +} + +; CHECK: Scop! diff --git a/polly/test/TempScop/not-a-reduction.c b/polly/test/TempScop/not-a-reduction.c new file mode 100644 index 00000000000..7acddb4c2cc --- /dev/null +++ b/polly/test/TempScop/not-a-reduction.c @@ -0,0 +1,13 @@ +#define TYPE float +#define NUM 4 + +TYPE A[NUM]; +TYPE B[NUM]; +TYPE C[NUM]; + +void vector_multiply(void) { + int i; + for (i = 0; i < NUM; i++) { + A[i] = B[i] * C[i]; + } +} diff --git a/polly/test/TempScop/not-a-reduction.ll b/polly/test/TempScop/not-a-reduction.ll new file mode 100644 index 00000000000..93a12844a4f --- /dev/null +++ b/polly/test/TempScop/not-a-reduction.ll @@ -0,0 +1,37 @@ +; RUN: opt -polly-analyze-ir -analyze < %s 2>&1 | not FileCheck %s +; ModuleID = 'not-a-reduction.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +@B = common global [4 x float] zeroinitializer, align 16 +@C = common global [4 x float] zeroinitializer, align 16 +@A = common global [4 x float] zeroinitializer, align 16 + +define void @vector_multiply() nounwind { +bb: + br label %bb3 + +bb3: ; preds = %bb7, %bb + %indvar = phi i64 [ %indvar.next, %bb7 ], [ 0, %bb ] + %scevgep = getelementptr [4 x float]* @A, i64 0, i64 %indvar + %scevgep1 = getelementptr [4 x float]* @C, i64 0, i64 %indvar + %scevgep2 = getelementptr [4 x float]* @B, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 4 + br i1 %exitcond, label %bb4, label %bb8 + +bb4: ; preds = %bb3 + %tmp = load float* %scevgep2, align 4 + %tmp5 = load float* %scevgep1, align 4 + %tmp6 = fmul float %tmp, %tmp5 + store float %tmp6, float* %scevgep, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvar.next = add i64 %indvar, 1 + br label %bb3 + +bb8: ; preds = %bb3 + ret void +} + +; CHECK: Reduction diff --git a/polly/test/TempScop/reduction-add.c b/polly/test/TempScop/reduction-add.c new file mode 100644 index 00000000000..e4fb284425c --- /dev/null +++ b/polly/test/TempScop/reduction-add.c @@ -0,0 +1,12 @@ +#define NUM 128 + +int A[NUM]; +int R; + +int reduction(void) { + int i; + for (i = 0; i < NUM; i++) { + R += A[i]; + } + return R; +} diff --git a/polly/test/TempScop/reduction-add.ll b/polly/test/TempScop/reduction-add.ll new file mode 100644 index 00000000000..e5730e609d2 --- /dev/null +++ b/polly/test/TempScop/reduction-add.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze < %s 2>&1 | FileCheck %s +; ModuleID = 'reduction-add.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +@A = common global [128 x i32] zeroinitializer, align 16 +@R = common global i32 0, align 4 + +define i32 @reduction() nounwind { +bb: + br label %bb1 + +bb1: ; preds = %bb5, %bb + %indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ] + %scevgep = getelementptr [128 x i32]* @A, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 128 + br i1 %exitcond, label %bb2, label %bb6 + +bb2: ; preds = %bb1 + %tmp = load i32* %scevgep, align 4 + %tmp3 = load i32* @R, align 4 + %tmp4 = add nsw i32 %tmp3, %tmp + store i32 %tmp4, i32* @R, align 4 + br label %bb5 + +bb5: ; preds = %bb2 + %indvar.next = add i64 %indvar, 1 + br label %bb1 + +bb6: ; preds = %bb1 + %tmp7 = load i32* @R, align 4 + ret i32 %tmp7 +} + +; CHECK: Reduction diff --git a/polly/test/TempScop/reduction-sub.c b/polly/test/TempScop/reduction-sub.c new file mode 100644 index 00000000000..e4eba02345f --- /dev/null +++ b/polly/test/TempScop/reduction-sub.c @@ -0,0 +1,12 @@ +#define NUM 128 + +int A[NUM]; +int R; + +int reduction(void) { + int i; + for (i = 0; i < NUM; i++) { + R -= A[i]; + } + return R; +} diff --git a/polly/test/TempScop/reduction-sub.ll b/polly/test/TempScop/reduction-sub.ll new file mode 100644 index 00000000000..ef306b815ae --- /dev/null +++ b/polly/test/TempScop/reduction-sub.ll @@ -0,0 +1,35 @@ +; RUN: opt -polly-analyze-ir -analyze < %s 2>&1 | not FileCheck %s +; ModuleID = 'reduction-sub.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +@A = common global [128 x i32] zeroinitializer, align 16 +@R = common global i32 0, align 4 + +define i32 @reduction() nounwind { +bb: + br label %bb1 + +bb1: ; preds = %bb5, %bb + %indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ] + %scevgep = getelementptr [128 x i32]* @A, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 128 + br i1 %exitcond, label %bb2, label %bb6 + +bb2: ; preds = %bb1 + %tmp = load i32* %scevgep, align 4 + %tmp3 = load i32* @R, align 4 + %tmp4 = sub nsw i32 %tmp3, %tmp + store i32 %tmp4, i32* @R, align 4 + br label %bb5 + +bb5: ; preds = %bb2 + %indvar.next = add i64 %indvar, 1 + br label %bb1 + +bb6: ; preds = %bb1 + %tmp7 = load i32* @R, align 4 + ret i32 %tmp7 +} + +; CHECK: Reduction diff --git a/polly/test/TempScop/reduction-with-added-immediate.c b/polly/test/TempScop/reduction-with-added-immediate.c new file mode 100644 index 00000000000..985f84b6bbf --- /dev/null +++ b/polly/test/TempScop/reduction-with-added-immediate.c @@ -0,0 +1,12 @@ +#define NUM 128 + +int A[NUM]; +int R; + +int not_a_reduction(void) { + int i; + for (i = 0; i < NUM; i++) { + R += 1 + A[i]; + } + return R; +} diff --git a/polly/test/TempScop/reduction-with-added-immediate.ll b/polly/test/TempScop/reduction-with-added-immediate.ll new file mode 100644 index 00000000000..a8c652a2b75 --- /dev/null +++ b/polly/test/TempScop/reduction-with-added-immediate.ll @@ -0,0 +1,36 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -analyze < %s 2>&1 | FileCheck %s +; ModuleID = 'reduction-with-added-immediate.s' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +@A = common global [128 x i32] zeroinitializer, align 16 +@R = common global i32 0, align 4 + +define i32 @not_a_reduction() nounwind { +bb: + br label %bb1 + +bb1: ; preds = %bb6, %bb + %indvar = phi i64 [ %indvar.next, %bb6 ], [ 0, %bb ] + %scevgep = getelementptr [128 x i32]* @A, i64 0, i64 %indvar + %exitcond = icmp ne i64 %indvar, 128 + br i1 %exitcond, label %bb2, label %bb7 + +bb2: ; preds = %bb1 + %tmp = load i32* %scevgep, align 4 + %tmp3 = add nsw i32 %tmp, 1 + %tmp4 = load i32* @R, align 4 + %tmp5 = add nsw i32 %tmp4, %tmp3 + store i32 %tmp5, i32* @R, align 4 + br label %bb6 + +bb6: ; preds = %bb2 + %indvar.next = add i64 %indvar, 1 + br label %bb1 + +bb7: ; preds = %bb1 + %tmp8 = load i32* @R, align 4 + ret i32 %tmp8 +} + +; CHECK: Reduction diff --git a/polly/test/create_ll.sh b/polly/test/create_ll.sh new file mode 100755 index 00000000000..d4fbf4496fd --- /dev/null +++ b/polly/test/create_ll.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +clang -S -emit-llvm -O0 $1 + +SFILE=`echo $1 | sed -e 's/\.c/.s/g'` +LLFILE=`echo $1 | sed -e 's/\.c/.ll/g'` + +opt -correlated-propagation -mem2reg -instcombine -loop-simplify -indvars \ +-instnamer ${SFILE} -S > ${LLFILE} + +rm ${SFILE} diff --git a/polly/test/lit.cfg b/polly/test/lit.cfg new file mode 100644 index 00000000000..1856ebe26f3 --- /dev/null +++ b/polly/test/lit.cfg @@ -0,0 +1,97 @@ +# -*clang- Python -*- + +import os +import platform + +# Configuration file for the 'lit' test runner. + +# name: The name of this test suite. +config.name = 'Polly' + +# testFormat: The test format to use to interpret tests. +# +# For now we require '&&' between commands, until they get globally killed and +# the test runner updated. +execute_external = platform.system() != 'Windows' +config.test_format = lit.formats.ShTest(execute_external) + +# suffixes: A list of file extensions to treat as test files. +config.suffixes = ['.ll'] + +# test_source_root: The root path where tests are located. +config.test_source_root = os.path.dirname(__file__) + +# test_exec_root: The root path where tests should be run. +polly_obj_root = getattr(config, 'polly_obj_root', None) +if polly_obj_root is not None: + config.test_exec_root = os.path.join(polly_obj_root, 'test') + +# Set llvm_{src,obj}_root for use by others. +config.llvm_src_root = getattr(config, 'llvm_src_root', None) +config.llvm_obj_root = getattr(config, 'llvm_obj_root', None) + +# Tweak the PATH to include the tools dir and the scripts dir. +if polly_obj_root is not None: + llvm_tools_dir = getattr(config, 'llvm_tools_dir', None) + if not llvm_tools_dir: + lit.fatal('No LLVM tools dir set!') + path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH'])) + config.environment['PATH'] = path + + llvm_libs_dir = getattr(config, 'llvm_libs_dir', None) + if not llvm_libs_dir: + lit.fatal('No LLVM libs dir set!') + path = os.path.pathsep.join((llvm_libs_dir, + config.environment.get('LD_LIBRARY_PATH',''))) + config.environment['LD_LIBRARY_PATH'] = path + +### + +# Check that the object root is known. +if config.test_exec_root is None: + # Otherwise, we haven't loaded the site specific configuration (the user is + # probably trying to run on a test file directly, and either the site + # configuration hasn't been created by the build system, or we are in an + # out-of-tree build situation). + + # Check for 'polly_site_config' user parameter, and use that if available. + site_cfg = lit.params.get('polly_site_config', None) + if site_cfg and os.path.exists(site_cfg): + lit.load_config(config, site_cfg) + raise SystemExit + + # Try to detect the situation where we are using an out-of-tree build by + # looking for 'llvm-config'. + # + # FIXME: I debated (i.e., wrote and threw away) adding logic to + # automagically generate the lit.site.cfg if we are in some kind of fresh + # build situation. This means knowing how to invoke the build system though, + # and I decided it was too much magic. We should solve this by just having + # the .cfg files generated during the configuration step. + + llvm_config = lit.util.which('llvm-config', config.environment['PATH']) + if not llvm_config: + lit.fatal('No site specific configuration available!') + + # Get the source and object roots. + llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip() + llvm_obj_root = lit.util.capture(['llvm-config', '--obj-root']).strip() + polly_src_root = os.path.join(llvm_src_root, "tools", "polly") + polly_obj_root = os.path.join(llvm_obj_root, "tools", "polly") + + # Validate that we got a tree which points to here, using the standard + # tools/polly layout. + this_src_root = os.path.dirname(config.test_source_root) + if os.path.realpath(polly_src_root) != os.path.realpath(this_src_root): + lit.fatal('No site specific configuration available!') + + # Check that the site specific configuration exists. + site_cfg = os.path.join(polly_obj_root, 'test', 'lit.site.cfg') + if not os.path.exists(site_cfg): + lit.fatal('No site specific configuration available!') + + # Okay, that worked. Notify the user of the automagic, and reconfigure. + lit.note('using out-of-tree build at %r' % polly_obj_root) + lit.load_config(config, site_cfg) + raise SystemExit + diff --git a/polly/test/lit.site.cfg.in b/polly/test/lit.site.cfg.in new file mode 100644 index 00000000000..a525c98cfcd --- /dev/null +++ b/polly/test/lit.site.cfg.in @@ -0,0 +1,26 @@ +## Autogenerated by LLVM/Polly configuration. +# Do not edit! +config.llvm_src_root = "@LLVM_SOURCE_DIR@" +config.llvm_obj_root = "@LLVM_BINARY_DIR@" +config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" +config.llvm_libs_dir = "@LLVM_LIBS_DIR@" +config.polly_obj_root = "@POLLY_BINARY_DIR@" +config.target_triple = "@TARGET_TRIPLE@" +lit.params['build_config'] = "@POLLY_SOURCE_DIR@/test" + +# Support substitution of the tools and libs dirs with user parameters. This is +# used when we can't determine the tool dir at configuration time. +try: + config.llvm_tools_dir = config.llvm_tools_dir % lit.params + config.llvm_libs_dir = config.llvm_libs_dir % lit.params +except KeyError,e: + key, = e.args + lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) + +config.substitutions.append(('%loadPolly', '-load ' + + config.llvm_libs_dir + '/LLVMPolly.so ')) +config.substitutions.append(('%defaultOpts', ' -basicaa -polly-prepare -polly-region-simplify -scev-aa ')) +config.substitutions.append(('%polybenchOpts', ' -O3 -loop-simplify -indvars ')) + +# Let the main config do the real work. +lit.load_config(config, "@POLLY_SOURCE_DIR@/test/lit.cfg") diff --git a/polly/test/polly.ll b/polly/test/polly.ll new file mode 100644 index 00000000000..c09f98332d6 --- /dev/null +++ b/polly/test/polly.ll @@ -0,0 +1,12 @@ +; RUN: opt %loadPolly %defaultOpts -polly-scops -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" +define void @foo() nounwind { +start: + br label %end + +end: + ret void +} + +; CHECK: foo diff --git a/polly/test/polybench/README b/polly/test/polybench/README new file mode 100755 index 00000000000..f9a56bc9d20 --- /dev/null +++ b/polly/test/polybench/README @@ -0,0 +1,87 @@ +* * * * * * * * * * +* PolyBenchs 1.0 * +* * * * * * * * * * + + +* Available benchmarks: + +::linear-algebra:: +linear-algebra/kernels: +linear-algebra/kernels/atax/2mm.c +linear-algebra/kernels/atax/3mm.c +linear-algebra/kernels/atax/atax.c +linear-algebra/kernels/bicg/bicg.c +linear-algebra/kernels/doitgen/doitgen.c +linear-algebra/kernels/gemm/gemm.c +linear-algebra/kernels/gemver/gemver.c +linear-algebra/kernels/gesummv/gesummv.c + +linear-algebra/solvers: +linear-algebra/solvers/gramschmidt/gramschmidt.c +linear-algebra/solvers/lu/lu.c +linear-algebra/solvers/ludcmp/ludcmp.c + +::datamining:: +datamining/correlation/correlation.c +datamining/covariance/covariance.c + +::stencils:: +stencils/adi/adi.c +stencils/jacobi-2d-imper/jacobi-2d-imper.c +stencils/seidel/seidel.c + + +* To compile a benchmark without any monitoring: + +$> gcc -I utilities utilities/instrument.c linear-algebra/kernels/atax/atax.c + + + +* To compile a benchmark with execution time reporting: + +$> gcc -I utilities utilities/instrument.c linear-algebra/kernels/atax/atax.c -DPOLYBENCH_TIME + + +* Available options + +They are all passed as macro definitions during compilation time. The +current list is: +- POLYBENCH_TIME: output execution time (gettimeofday) [default: off] +- POLYBENCH_NO_FLUSH_CACHE: don't flush the cache before calling the + timer [default: flush the cache] +- POLYBENCH_LINUX_FIFO_SCHEDULER: use FIFO real-time scheduler for the + kernel execution, the program must be run as root, under linux only, + and compiled with -lc [default: off] +- POLYBENCH_CACHE_SIZE_KB: cache size to flush, in kB [default: 8192] +- POLYBENCH_TEST_MALLOC: use malloc instead of stack allocation [default: off] +- POLYBENCH_DUMP_ARRAYS: dump all live-out arrays on stderr [default: off] + +- Nxx: override the default dataset values + + +* Collecting the execution time of all files + +$> scripts/runall.sh <machine-name> + +Note: the script runall must be run from the root directory of the archive. + + +** Specifying different dataset size for one file in particular + +- Create a compiler.opts file under the directory of the benchmark +- Specify a set of options, e.g. -DX=1024 -DY=1024 (see gemm for an example) + + +** To specify another compiler + +- Simplest is to edit scripts/runall.sh, and change the + COMPILER_COMMAND variable. One can also export the adequate + COMPILER_COMMAND variable in the shell environment. + + + +* Author +Louis-Noel Pouchet <pouchet@cse.ohio-state.edu> + +* Contributors +Uday Bondhugula <ubondhug@us.ibm.com> diff --git a/polly/test/polybench/datamining/correlation/correlation.c b/polly/test/polybench/datamining/correlation/correlation.c new file mode 100755 index 00000000000..8822f0d4e7d --- /dev/null +++ b/polly/test/polybench/datamining/correlation/correlation.c @@ -0,0 +1,165 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + + +/* Default problem size. */ +#ifndef M +# define M 500 +#endif +#ifndef N +# define N 500 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif +#ifndef DATA_PRINTF_MODIFIER +# define DATA_PRINTF_MODIFIER "%0.2lf " +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +DATA_TYPE float_n = 321414134.01; +DATA_TYPE eps = 0.005; +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE data[M + 1][N + 1]; +DATA_TYPE symmat[M + 1][M + 1]; +DATA_TYPE stddev[M + 1]; +DATA_TYPE mean[M + 1]; +#else +DATA_TYPE** data = (DATA_TYPE**)malloc((M + 1) * sizeof(DATA_TYPE*)); +DATA_TYPE** symmat = (DATA_TYPE**)malloc((M + 1) * sizeof(DATA_TYPE*)); +DATA_TYPE* stddev = (DATA_TYPE*)malloc((M + 1) * sizeof(DATA_TYPE)); +DATA_TYPE* mean = (DATA_TYPE*)malloc((M + 1) * sizeof(DATA_TYPE)); +{ + int i; + for (i = 0; i <= M; ++i) + { + data[i] = (DATA_TYPE*)malloc((N + 1) * sizeof(DATA_TYPE)); + symmat[i] = (DATA_TYPE*)malloc((M + 1) * sizeof(DATA_TYPE)); + } +} +#endif + +inline +void init_array() +{ + int i, j; + + for (i = 0; i <= M; i++) + for (j = 0; j <= N; j++) + data[i][j] = ((DATA_TYPE) i*j) / M; +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i <= M; i++) + for (j = 0; j <= M; j++) { + fprintf(stderr, DATA_PRINTF_MODIFIER, symmat[i][j]); + if ((i * M + j) % 80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long m = M; + long n = N; +#else +void scop_func(long m, long n) { +#endif + + int i, j, j1, j2; +#pragma scop +#pragma live-out symmat + + /* Center and reduce the column vectors. */ + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + { + data[i][j] -= mean[j]; + data[i][j] /= sqrt(float_n) * stddev[j]; + } + + /* Calculate the m * m correlation matrix. */ + for (j1 = 1; j1 <= m-1; j1++) + { + symmat[j1][j1] = 1.0; + for (j2 = j1+1; j2 <= m; j2++) + { + symmat[j1][j2] = 0.0; + for (i = 1; i <= n; i++) + symmat[j1][j2] += (data[i][j1] * data[i][j2]); + symmat[j2][j1] = symmat[j1][j2]; + } + } +#pragma endscop +} + +int main(int argc, char** argv) +{ + int i, j, j1, j2; + int m = M; + int n = N; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + + +#define sqrt_of_array_cell(x,j) sqrt(x[j]) + + + /* Determine mean of column vectors of input data matrix */ + for (j = 1; j <= m; j++) + { + mean[j] = 0.0; + for (i = 1; i <= n; i++) + mean[j] += data[i][j]; + mean[j] /= float_n; + } + +/* Determine standard deviations of column vectors of data matrix. */ + for (j = 1; j <= m; j++) + { + stddev[j] = 0.0; + for (i = 1; i <= n; i++) + stddev[j] += (data[i][j] - mean[j]) * (data[i][j] - mean[j]); + stddev[j] /= float_n; + stddev[j] = sqrt_of_array_cell(stddev, j); + /* The following in an inelegant but usual way to handle + near-zero std. dev. values, which below would cause a zero- + divide. */ + stddev[j] = stddev[j] <= eps ? 1.0 : stddev[j]; + } +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(m, n); +#endif + symmat[m][m] = 1.0; + + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/datamining/correlation/correlation_with_param.ll b/polly/test/polybench/datamining/correlation/correlation_with_param.ll new file mode 100644 index 00000000000..046b87f236f --- /dev/null +++ b/polly/test/polybench/datamining/correlation/correlation_with_param.ll @@ -0,0 +1,169 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -print-top-scop-only -analyze %s | FileCheck %s +; XFAIL: * +; ModuleID = './datamining/correlation/correlation_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@float_n = global double 0x41B32863F6028F5C +@eps = global double 5.000000e-03 +@data = common global [501 x [501 x double]] zeroinitializer, align 32 +@symmat = common global [501 x [501 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@mean = common global [501 x double] zeroinitializer, align 32 +@stddev = common global [501 x double] zeroinitializer, align 32 + +define void @scop_func(i64 %m, i64 %n) nounwind { +entry: + %0 = icmp slt i64 %n, 1 + %1 = icmp slt i64 %m, 1 + %or.cond = or i1 %0, %1 + br i1 %or.cond, label %bb13.preheader, label %bb2.preheader.preheader + +bb2.preheader.preheader: ; preds = %entry + br label %bb2.preheader + +bb1: ; preds = %bb2.preheader, %bb1 + %indvar52 = phi i64 [ %tmp63, %bb1 ], [ 0, %bb2.preheader ] + %tmp9 = add i64 %indvar52, 1 + %scevgep59 = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp8, i64 %tmp9 + %tmp61 = add i64 %indvar52, 2 + %tmp3 = add i64 %indvar52, 1 + %scevgep60 = getelementptr [501 x double]* @mean, i64 0, i64 %tmp3 + %scevgep55 = getelementptr [501 x double]* @stddev, i64 0, i64 %tmp3 + %tmp63 = add i64 %indvar52, 1 + %2 = load double* %scevgep59, align 8 + %3 = load double* %scevgep60, align 8 + %4 = fsub double %2, %3 + store double %4, double* %scevgep59, align 8 + %5 = load double* @float_n, align 8 + %6 = tail call double @sqrt(double %5) nounwind readonly + %7 = load double* %scevgep55, align 8 + %8 = fmul double %6, %7 + %9 = fdiv double %4, %8 + store double %9, double* %scevgep59, align 8 + %10 = icmp sgt i64 %tmp61, %m + br i1 %10, label %bb3, label %bb1 + +bb3: ; preds = %bb1 + %11 = icmp sgt i64 %tmp65, %n + br i1 %11, label %bb13.preheader.loopexit, label %bb2.preheader + +bb2.preheader: ; preds = %bb2.preheader.preheader, %bb3 + %indvar56 = phi i64 [ %tmp62, %bb3 ], [ 0, %bb2.preheader.preheader ] + %tmp8 = add i64 %indvar56, 1 + %tmp65 = add i64 %indvar56, 2 + %tmp62 = add i64 %indvar56, 1 + br label %bb1 + +bb13.preheader.loopexit: ; preds = %bb3 + br label %bb13.preheader + +bb13.preheader: ; preds = %bb13.preheader.loopexit, %entry + %12 = add nsw i64 %m, -1 + %13 = icmp slt i64 %12, 1 + br i1 %13, label %return, label %bb6.preheader + +bb6.preheader: ; preds = %bb13.preheader + %tmp = add i64 %m, -1 + br label %bb6 + +bb6: ; preds = %bb6.preheader, %bb12 + %indvar14 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next15, %bb12 ] + %tmp35 = add i64 %indvar14, 3 + %tmp36 = trunc i64 %tmp35 to i32 + %tmp38 = add i64 %indvar14, 2 + %tmp39 = trunc i64 %tmp38 to i32 + %tmp46 = add i64 %indvar14, 1 + %scevgep49 = getelementptr [501 x [501 x double]]* @symmat, i64 0, i64 0, i64 %tmp46 + %scevgep53 = getelementptr [501 x [501 x double]]* @symmat, i64 0, i64 %tmp46, i64 0 + %tmp59 = mul i64 %indvar14, 502 + %tmp60 = add i64 %tmp59, 1 + %scevgep61 = getelementptr [501 x [501 x double]]* @symmat, i64 0, i64 1, i64 %tmp60 + store double 1.000000e+00, double* %scevgep61, align 8 + %14 = icmp sgt i64 %tmp38, %m + br i1 %14, label %bb12, label %bb.nph12 + +bb.nph12: ; preds = %bb6 + br i1 %0, label %bb10.us.preheader, label %bb.nph.preheader + +bb10.us.preheader: ; preds = %bb.nph12 + br label %bb10.us + +bb.nph.preheader: ; preds = %bb.nph12 + br label %bb.nph + +bb10.us: ; preds = %bb10.us.preheader, %bb10.us + %indvar = phi i32 [ %indvar.next, %bb10.us ], [ 0, %bb10.us.preheader ] + %storemerge2.us = add i32 %tmp36, %indvar + %storemerge28.us = add i32 %tmp39, %indvar + %tmp55 = sext i32 %storemerge28.us to i64 + %tmp56 = mul i64 %tmp55, 501 + %scevgep57 = getelementptr double* %scevgep49, i64 %tmp56 + %scevgep58 = getelementptr double* %scevgep53, i64 %tmp55 + store double 0.000000e+00, double* %scevgep58, align 8 + store double 0.000000e+00, double* %scevgep57, align 8 + %15 = sext i32 %storemerge2.us to i64 + %16 = icmp sgt i64 %15, %m + %indvar.next = add i32 %indvar, 1 + br i1 %16, label %bb12.loopexit1, label %bb10.us + +bb.nph: ; preds = %bb.nph.preheader, %bb10 + %indvar41 = phi i32 [ %indvar.next42, %bb10 ], [ 0, %bb.nph.preheader ] + %storemerge2 = add i32 %tmp36, %indvar41 + %storemerge28 = add i32 %tmp39, %indvar41 + %tmp50 = sext i32 %storemerge28 to i64 + %tmp51 = mul i64 %tmp50, 501 + %scevgep52 = getelementptr double* %scevgep49, i64 %tmp51 + %scevgep54 = getelementptr double* %scevgep53, i64 %tmp50 + %tmp21 = sext i32 %storemerge28 to i64 + store double 0.000000e+00, double* %scevgep54, align 8 + br label %bb8 + +bb8: ; preds = %bb8, %bb.nph + %indvar38 = phi i64 [ 0, %bb.nph ], [ %tmp40, %bb8 ] + %17 = phi double [ 0.000000e+00, %bb.nph ], [ %21, %bb8 ] + %tmp44 = add i64 %indvar38, 1 + %scevgep47 = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp44, i64 %tmp46 + %tmp48 = add i64 %indvar38, 2 + %tmp13 = add i64 %indvar38, 1 + %scevgep = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp13, i64 %tmp21 + %tmp40 = add i64 %indvar38, 1 + %18 = load double* %scevgep47, align 8 + %19 = load double* %scevgep, align 8 + %20 = fmul double %18, %19 + %21 = fadd double %17, %20 + %22 = icmp sgt i64 %tmp48, %n + br i1 %22, label %bb10, label %bb8 + +bb10: ; preds = %bb8 + %.lcssa = phi double [ %21, %bb8 ] + store double %.lcssa, double* %scevgep54 + store double %.lcssa, double* %scevgep52, align 8 + %23 = sext i32 %storemerge2 to i64 + %24 = icmp sgt i64 %23, %m + %indvar.next42 = add i32 %indvar41, 1 + br i1 %24, label %bb12.loopexit, label %bb.nph + +bb12.loopexit: ; preds = %bb10 + br label %bb12 + +bb12.loopexit1: ; preds = %bb10.us + br label %bb12 + +bb12: ; preds = %bb12.loopexit1, %bb12.loopexit, %bb6 + %indvar.next15 = add i64 %indvar14, 1 + %exitcond = icmp eq i64 %indvar.next15, %tmp + br i1 %exitcond, label %return.loopexit, label %bb6 + +return.loopexit: ; preds = %bb12 + br label %return + +return: ; preds = %return.loopexit, %bb13.preheader + ret void +} + +declare double @sqrt(double) nounwind readonly diff --git a/polly/test/polybench/datamining/correlation/correlation_without_param.ll b/polly/test/polybench/datamining/correlation/correlation_without_param.ll new file mode 100644 index 00000000000..edde980da6f --- /dev/null +++ b/polly/test/polybench/datamining/correlation/correlation_without_param.ll @@ -0,0 +1,117 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -print-top-scop-only -analyze %s | FileCheck %s +; XFAIL: * +; ModuleID = './datamining/correlation/correlation_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@float_n = global double 0x41B32863F6028F5C +@eps = global double 5.000000e-03 +@data = common global [501 x [501 x double]] zeroinitializer, align 32 +@symmat = common global [501 x [501 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@mean = common global [501 x double] zeroinitializer, align 32 +@stddev = common global [501 x double] zeroinitializer, align 32 + +define void @scop_func() nounwind { +bb.nph33.bb.nph33.split_crit_edge: + br label %bb2.preheader + +bb1: ; preds = %bb2.preheader, %bb1 + %indvar45 = phi i64 [ %tmp57, %bb1 ], [ 0, %bb2.preheader ] + %tmp51 = add i64 %indvar45, 1 + %scevgep53 = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp50, i64 %tmp51 + %tmp44 = add i64 %indvar45, 1 + %scevgep54 = getelementptr [501 x double]* @mean, i64 0, i64 %tmp44 + %scevgep49 = getelementptr [501 x double]* @stddev, i64 0, i64 %tmp44 + %tmp57 = add i64 %indvar45, 1 + %0 = load double* %scevgep53, align 8 + %1 = load double* %scevgep54, align 8 + %2 = fsub double %0, %1 + store double %2, double* %scevgep53, align 8 + %3 = load double* @float_n, align 8 + %4 = tail call double @sqrt(double %3) nounwind readonly + %5 = load double* %scevgep49, align 8 + %6 = fmul double %4, %5 + %7 = fdiv double %2, %6 + store double %7, double* %scevgep53, align 8 + %exitcond43 = icmp eq i64 %tmp57, 500 + br i1 %exitcond43, label %bb3, label %bb1 + +bb3: ; preds = %bb1 + %tmp56 = add i64 %indvar50, 1 + %exitcond49 = icmp eq i64 %tmp56, 500 + br i1 %exitcond49, label %bb6.preheader, label %bb2.preheader + +bb6.preheader: ; preds = %bb3 + br label %bb6 + +bb2.preheader: ; preds = %bb3, %bb.nph33.bb.nph33.split_crit_edge + %indvar50 = phi i64 [ 0, %bb.nph33.bb.nph33.split_crit_edge ], [ %tmp56, %bb3 ] + %tmp50 = add i64 %indvar50, 1 + br label %bb1 + +bb6: ; preds = %bb6.preheader, %bb12 + %indvar3 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next, %bb12 ] + %tmp25 = mul i64 %indvar3, 502 + %tmp26 = add i64 %tmp25, 2 + %tmp30 = add i64 %tmp25, 1 + %tmp33 = add i64 %indvar3, 2 + %tmp36 = mul i64 %indvar3, -1 + %tmp12 = add i64 %tmp36, 499 + %tmp38 = add i64 %indvar3, 1 + %scevgep42 = getelementptr [501 x [501 x double]]* @symmat, i64 0, i64 1, i64 %tmp30 + store double 1.000000e+00, double* %scevgep42, align 8 + br i1 false, label %bb12, label %bb.nph12.bb.nph12.split_crit_edge + +bb.nph12.bb.nph12.split_crit_edge: ; preds = %bb6 + br label %bb.nph + +bb.nph: ; preds = %bb10, %bb.nph12.bb.nph12.split_crit_edge + %indvar6 = phi i64 [ %indvar.next7, %bb10 ], [ 0, %bb.nph12.bb.nph12.split_crit_edge ] + %tmp27 = add i64 %tmp26, %indvar6 + %scevgep23 = getelementptr [501 x [501 x double]]* @symmat, i64 0, i64 1, i64 %tmp27 + %tmp29 = add i64 %indvar6, 2 + %scevgep20 = getelementptr [501 x [501 x double]]* @symmat, i64 0, i64 %tmp29, i64 %tmp30 + %tmp34 = add i64 %tmp33, %indvar6 + store double 0.000000e+00, double* %scevgep23, align 8 + br label %bb8 + +bb8: ; preds = %bb8, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp, %bb8 ] + %8 = phi double [ 0.000000e+00, %bb.nph ], [ %12, %bb8 ] + %tmp32 = add i64 %indvar, 1 + %scevgep = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp32, i64 %tmp34 + %scevgep41 = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp32, i64 %tmp38 + %tmp = add i64 %indvar, 1 + %9 = load double* %scevgep41, align 8 + %10 = load double* %scevgep, align 8 + %11 = fmul double %9, %10 + %12 = fadd double %8, %11 + %exitcond1 = icmp eq i64 %tmp, 500 + br i1 %exitcond1, label %bb10, label %bb8 + +bb10: ; preds = %bb8 + %.lcssa = phi double [ %12, %bb8 ] + store double %.lcssa, double* %scevgep23 + store double %.lcssa, double* %scevgep20, align 8 + %indvar.next7 = add i64 %indvar6, 1 + %exitcond = icmp eq i64 %indvar.next7, %tmp12 + br i1 %exitcond, label %bb12.loopexit, label %bb.nph + +bb12.loopexit: ; preds = %bb10 + br label %bb12 + +bb12: ; preds = %bb12.loopexit, %bb6 + %indvar.next = add i64 %indvar3, 1 + %exitcond24 = icmp eq i64 %indvar.next, 499 + br i1 %exitcond24, label %return, label %bb6 + +return: ; preds = %bb12 + ret void +} + +declare double @sqrt(double) nounwind readonly diff --git a/polly/test/polybench/datamining/covariance/covariance.c b/polly/test/polybench/datamining/covariance/covariance.c new file mode 100755 index 00000000000..b565d7d4e95 --- /dev/null +++ b/polly/test/polybench/datamining/covariance/covariance.c @@ -0,0 +1,138 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + + +/* Default problem size. */ +#ifndef M +# define M 500 +#endif +#ifndef N +# define N 500 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif +#ifndef DATA_PRINTF_MODIFIER +# define DATA_PRINTF_MODIFIER "%0.2lf " +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +DATA_TYPE float_n = 321414134.01; +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE data[M + 1][N + 1]; +DATA_TYPE symmat[M + 1][M + 1]; +DATA_TYPE mean[M + 1]; +#else +DATA_TYPE** data = (DATA_TYPE**)malloc((M + 1) * sizeof(DATA_TYPE*)); +DATA_TYPE** symmat = (DATA_TYPE**)malloc((M + 1) * sizeof(DATA_TYPE*)); +DATA_TYPE* mean = (DATA_TYPE*)malloc((M + 1) * sizeof(DATA_TYPE)); +{ + int i; + for (i = 0; i <= M; ++i) + { + data[i] = (DATA_TYPE*)malloc((N + 1) * sizeof(DATA_TYPE)); + symmat[i] = (DATA_TYPE*)malloc((M + 1) * sizeof(DATA_TYPE)); + } +} +#endif + +inline +void init_array() +{ + int i, j; + + for (i = 0; i <= M; i++) + for (j = 0; j <= N; j++) + data[i][j] = ((DATA_TYPE) i*j) / M; +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i <= M; i++) + for (j = 0; j <= M; j++) { + fprintf(stderr, DATA_PRINTF_MODIFIER, symmat[i][j]); + if ((i * M + j) % 80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + int m = M; + int n = N; +#else +void scop_func(long m, long n) { +#endif + long i, j, j1, j2; +#pragma scop +#pragma live-out symmat + + /* Determine mean of column vectors of input data matrix */ + for (j = 1; j <= m; j++) + { + mean[j] = 0.0; + for (i = 1; i <= n; i++) + mean[j] += data[i][j]; + mean[j] /= float_n; + } + + /* Center the column vectors. */ + for (i = 1; i <= n; i++) + for (j = 1; j <= m; j++) + data[i][j] -= mean[j]; + + /* Calculate the m * m covariance matrix. */ + for (j1 = 1; j1 <= m; j1++) + for (j2 = j1; j2 <= m; j2++) + { + symmat[j1][j2] = 0.0; + for (i = 1; i <= n; i++) + symmat[j1][j2] += data[i][j1] * data[i][j2]; + symmat[j2][j1] = symmat[j1][j2]; + } + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int i, j, j1, j2; + int m = M; + int n = N; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(m, n); +#endif + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/datamining/covariance/covariance_with_param.ll b/polly/test/polybench/datamining/covariance/covariance_with_param.ll new file mode 100644 index 00000000000..df547013a0c --- /dev/null +++ b/polly/test/polybench/datamining/covariance/covariance_with_param.ll @@ -0,0 +1,203 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -print-top-scop-only -analyze %s | FileCheck %s +; XFAIL: * +; ModuleID = './datamining/covariance/covariance_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@float_n = global double 0x41B32863F6028F5C +@data = common global [501 x [501 x double]] zeroinitializer, align 32 +@symmat = common global [501 x [501 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@mean = common global [501 x double] zeroinitializer, align 32 + +define void @scop_func(i64 %m, i64 %n) nounwind { +entry: + %0 = icmp slt i64 %m, 1 + br i1 %0, label %bb10.preheader, label %bb.nph44 + +bb.nph44: ; preds = %entry + %1 = icmp slt i64 %n, 1 + %2 = load double* @float_n, align 8 + br i1 %1, label %bb3.us.preheader, label %bb.nph36.preheader + +bb3.us.preheader: ; preds = %bb.nph44 + br label %bb3.us + +bb.nph36.preheader: ; preds = %bb.nph44 + br label %bb.nph36 + +bb3.us: ; preds = %bb3.us.preheader, %bb3.us + %indvar = phi i64 [ %tmp, %bb3.us ], [ 0, %bb3.us.preheader ] + %tmp45 = add i64 %indvar, 2 + %tmp13 = add i64 %indvar, 1 + %scevgep = getelementptr [501 x double]* @mean, i64 0, i64 %tmp13 + %tmp = add i64 %indvar, 1 + %3 = fdiv double 0.000000e+00, %2 + store double %3, double* %scevgep, align 8 + %4 = icmp sgt i64 %tmp45, %m + br i1 %4, label %bb10.preheader.loopexit1, label %bb3.us + +bb.nph36: ; preds = %bb.nph36.preheader, %bb3 + %indvar94 = phi i64 [ %tmp100, %bb3 ], [ 0, %bb.nph36.preheader ] + %tmp8 = add i64 %indvar94, 1 + %tmp102 = add i64 %indvar94, 2 + %scevgep103 = getelementptr [501 x double]* @mean, i64 0, i64 %tmp8 + %tmp100 = add i64 %indvar94, 1 + store double 0.000000e+00, double* %scevgep103, align 8 + br label %bb1 + +bb1: ; preds = %bb1, %bb.nph36 + %indvar91 = phi i64 [ 0, %bb.nph36 ], [ %tmp99, %bb1 ] + %5 = phi double [ 0.000000e+00, %bb.nph36 ], [ %7, %bb1 ] + %tmp7 = add i64 %indvar91, 1 + %scevgep97 = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp7, i64 %tmp8 + %tmp98 = add i64 %indvar91, 2 + %tmp99 = add i64 %indvar91, 1 + %6 = load double* %scevgep97, align 8 + %7 = fadd double %5, %6 + %8 = icmp sgt i64 %tmp98, %n + br i1 %8, label %bb3, label %bb1 + +bb3: ; preds = %bb1 + %.lcssa = phi double [ %7, %bb1 ] + %9 = fdiv double %.lcssa, %2 + store double %9, double* %scevgep103, align 8 + %10 = icmp sgt i64 %tmp102, %m + br i1 %10, label %bb10.preheader.loopexit, label %bb.nph36 + +bb10.preheader.loopexit: ; preds = %bb3 + br label %bb10.preheader + +bb10.preheader.loopexit1: ; preds = %bb3.us + br label %bb10.preheader + +bb10.preheader: ; preds = %bb10.preheader.loopexit1, %bb10.preheader.loopexit, %entry + %11 = icmp slt i64 %n, 1 + br i1 %11, label %bb19.preheader, label %bb.nph33 + +bb7: ; preds = %bb8.preheader, %bb7 + %indvar77 = phi i64 [ %tmp87, %bb7 ], [ 0, %bb8.preheader ] + %tmp21 = add i64 %indvar77, 1 + %scevgep83 = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp20, i64 %tmp21 + %tmp85 = add i64 %indvar77, 2 + %tmp16 = add i64 %indvar77, 1 + %scevgep84 = getelementptr [501 x double]* @mean, i64 0, i64 %tmp16 + %tmp87 = add i64 %indvar77, 1 + %12 = load double* %scevgep83, align 8 + %13 = load double* %scevgep84, align 8 + %14 = fsub double %12, %13 + store double %14, double* %scevgep83, align 8 + %15 = icmp sgt i64 %tmp85, %m + br i1 %15, label %bb9, label %bb7 + +bb9: ; preds = %bb7 + %16 = icmp sgt i64 %tmp89, %n + br i1 %16, label %bb19.preheader.loopexit, label %bb8.preheader + +bb.nph33: ; preds = %bb10.preheader + br i1 %0, label %return, label %bb8.preheader.preheader + +bb8.preheader.preheader: ; preds = %bb.nph33 + br label %bb8.preheader + +bb8.preheader: ; preds = %bb8.preheader.preheader, %bb9 + %indvar79 = phi i64 [ %tmp86, %bb9 ], [ 0, %bb8.preheader.preheader ] + %tmp20 = add i64 %indvar79, 1 + %tmp89 = add i64 %indvar79, 2 + %tmp86 = add i64 %indvar79, 1 + br label %bb7 + +bb19.preheader.loopexit: ; preds = %bb9 + br label %bb19.preheader + +bb19.preheader: ; preds = %bb19.preheader.loopexit, %bb10.preheader + br i1 %0, label %return, label %bb17.preheader.preheader + +bb17.preheader.preheader: ; preds = %bb19.preheader + br label %bb17.preheader + +bb.nph13: ; preds = %bb17.preheader + br i1 %11, label %bb16.us.preheader, label %bb.nph13.bb.nph13.split_crit_edge + +bb16.us.preheader: ; preds = %bb.nph13 + br label %bb16.us + +bb.nph13.bb.nph13.split_crit_edge: ; preds = %bb.nph13 + br label %bb.nph + +bb16.us: ; preds = %bb16.us.preheader, %bb16.us + %indvar48 = phi i64 [ %indvar.next49, %bb16.us ], [ 0, %bb16.us.preheader ] + %tmp57 = add i64 %tmp56, %indvar48 + %scevgep57 = getelementptr [501 x [501 x double]]* @symmat, i64 0, i64 1, i64 %tmp57 + %tmp59 = add i64 %indvar48, 1 + %scevgep52 = getelementptr [501 x [501 x double]]* @symmat, i64 0, i64 %tmp59, i64 %tmp56 + %tmp54 = add i64 %tmp61, %indvar48 + store double 0.000000e+00, double* %scevgep57, align 8 + store double 0.000000e+00, double* %scevgep52, align 8 + %17 = icmp sgt i64 %tmp54, %m + %indvar.next49 = add i64 %indvar48, 1 + br i1 %17, label %bb18.loopexit2, label %bb16.us + +bb.nph: ; preds = %bb16, %bb.nph13.bb.nph13.split_crit_edge + %indvar62 = phi i64 [ 0, %bb.nph13.bb.nph13.split_crit_edge ], [ %indvar.next63, %bb16 ] + %tmp72 = add i64 %tmp61, %indvar62 + %tmp64 = add i64 %indvar62, 1 + %scevgep74 = getelementptr [501 x [501 x double]]* @symmat, i64 0, i64 %tmp64, i64 %tmp56 + %tmp69 = add i64 %tmp56, %indvar62 + %scevgep76 = getelementptr [501 x [501 x double]]* @symmat, i64 0, i64 1, i64 %tmp69 + %tmp74 = add i64 %storemerge214, %indvar62 + store double 0.000000e+00, double* %scevgep76, align 8 + br label %bb14 + +bb14: ; preds = %bb14, %bb.nph + %indvar59 = phi i64 [ 0, %bb.nph ], [ %tmp68, %bb14 ] + %18 = phi double [ 0.000000e+00, %bb.nph ], [ %22, %bb14 ] + %tmp71 = add i64 %indvar59, 1 + %scevgep65 = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp71, i64 %tmp74 + %scevgep66 = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp71, i64 %storemerge214 + %tmp67 = add i64 %indvar59, 2 + %tmp68 = add i64 %indvar59, 1 + %19 = load double* %scevgep66, align 8 + %20 = load double* %scevgep65, align 8 + %21 = fmul double %19, %20 + %22 = fadd double %18, %21 + %23 = icmp sgt i64 %tmp67, %n + br i1 %23, label %bb16, label %bb14 + +bb16: ; preds = %bb14 + %.lcssa24 = phi double [ %22, %bb14 ] + store double %.lcssa24, double* %scevgep76 + store double %.lcssa24, double* %scevgep74, align 8 + %24 = icmp sgt i64 %tmp72, %m + %indvar.next63 = add i64 %indvar62, 1 + br i1 %24, label %bb18.loopexit, label %bb.nph + +bb18.loopexit: ; preds = %bb16 + br label %bb18 + +bb18.loopexit2: ; preds = %bb16.us + br label %bb18 + +bb18: ; preds = %bb18.loopexit2, %bb18.loopexit, %bb17.preheader + %indvar.next = add i64 %indvar27, 1 + %exitcond = icmp eq i64 %indvar.next, %m + br i1 %exitcond, label %return.loopexit, label %bb17.preheader + +bb17.preheader: ; preds = %bb17.preheader.preheader, %bb18 + %indvar27 = phi i64 [ 0, %bb17.preheader.preheader ], [ %indvar.next, %bb18 ] + %tmp55 = mul i64 %indvar27, 502 + %tmp56 = add i64 %tmp55, 1 + %tmp61 = add i64 %indvar27, 2 + %storemerge214 = add i64 %indvar27, 1 + br i1 false, label %bb18, label %bb.nph13 + +return.loopexit: ; preds = %bb18 + br label %return + +return: ; preds = %return.loopexit, %bb19.preheader, %bb.nph33 + ret void +} diff --git a/polly/test/polybench/datamining/covariance/covariance_without_param.ll b/polly/test/polybench/datamining/covariance/covariance_without_param.ll new file mode 100644 index 00000000000..a0c1f51d1ef --- /dev/null +++ b/polly/test/polybench/datamining/covariance/covariance_without_param.ll @@ -0,0 +1,135 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -polly-cloog -analyze %s | FileCheck %s +; region-simplify make polly fail to detect the canonical induction variable. +; XFAIL:* + +; ModuleID = './datamining/covariance/covariance_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@float_n = global double 0x41B32863F6028F5C +@data = common global [501 x [501 x double]] zeroinitializer, align 32 +@symmat = common global [501 x [501 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@mean = common global [501 x double] zeroinitializer, align 32 + +define void @scop_func() nounwind { +bb.nph44.bb.nph44.split_crit_edge: + %0 = load double* @float_n, align 8 + br label %bb.nph36 + +bb.nph36: ; preds = %bb3, %bb.nph44.bb.nph44.split_crit_edge + %indvar77 = phi i64 [ 0, %bb.nph44.bb.nph44.split_crit_edge ], [ %tmp83, %bb3 ] + %tmp48 = add i64 %indvar77, 1 + %scevgep85 = getelementptr [501 x double]* @mean, i64 0, i64 %tmp48 + %tmp83 = add i64 %indvar77, 1 + store double 0.000000e+00, double* %scevgep85, align 8 + br label %bb1 + +bb1: ; preds = %bb1, %bb.nph36 + %indvar73 = phi i64 [ 0, %bb.nph36 ], [ %tmp82, %bb1 ] + %1 = phi double [ 0.000000e+00, %bb.nph36 ], [ %3, %bb1 ] + %tmp47 = add i64 %indvar73, 1 + %scevgep80 = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp47, i64 %tmp48 + %tmp82 = add i64 %indvar73, 1 + %2 = load double* %scevgep80, align 8 + %3 = fadd double %1, %2 + %exitcond42 = icmp eq i64 %tmp82, 500 + br i1 %exitcond42, label %bb3, label %bb1 + +bb3: ; preds = %bb1 + %.lcssa41 = phi double [ %3, %bb1 ] + %4 = fdiv double %.lcssa41, %0 + store double %4, double* %scevgep85, align 8 + %exitcond46 = icmp eq i64 %tmp83, 500 + br i1 %exitcond46, label %bb8.preheader.preheader, label %bb.nph36 + +bb8.preheader.preheader: ; preds = %bb3 + br label %bb8.preheader + +bb7: ; preds = %bb8.preheader, %bb7 + %indvar59 = phi i64 [ %tmp70, %bb7 ], [ 0, %bb8.preheader ] + %tmp39 = add i64 %indvar59, 1 + %scevgep66 = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp38, i64 %tmp39 + %tmp = add i64 %indvar59, 1 + %scevgep67 = getelementptr [501 x double]* @mean, i64 0, i64 %tmp + %tmp70 = add i64 %indvar59, 1 + %5 = load double* %scevgep66, align 8 + %6 = load double* %scevgep67, align 8 + %7 = fsub double %5, %6 + store double %7, double* %scevgep66, align 8 + %exitcond33 = icmp eq i64 %tmp70, 500 + br i1 %exitcond33, label %bb9, label %bb7 + +bb9: ; preds = %bb7 + %tmp69 = add i64 %indvar62, 1 + %exitcond37 = icmp eq i64 %tmp69, 500 + br i1 %exitcond37, label %bb17.preheader.preheader, label %bb8.preheader + +bb17.preheader.preheader: ; preds = %bb9 + br label %bb17.preheader + +bb8.preheader: ; preds = %bb8.preheader.preheader, %bb9 + %indvar62 = phi i64 [ %tmp69, %bb9 ], [ 0, %bb8.preheader.preheader ] + %tmp38 = add i64 %indvar62, 1 + br label %bb7 + +bb.nph13.bb.nph13.split_crit_edge: ; preds = %bb17.preheader + br label %bb.nph + +bb.nph: ; preds = %bb16, %bb.nph13.bb.nph13.split_crit_edge + %indvar46 = phi i64 [ 0, %bb.nph13.bb.nph13.split_crit_edge ], [ %indvar.next47, %bb16 ] + %tmp20 = add i64 %indvar46, 1 + %scevgep56 = getelementptr [501 x [501 x double]]* @symmat, i64 0, i64 %tmp20, i64 %tmp22 + %tmp24 = add i64 %tmp22, %indvar46 + %scevgep58 = getelementptr [501 x [501 x double]]* @symmat, i64 0, i64 1, i64 %tmp24 + %tmp28 = add i64 %storemerge214, %indvar46 + store double 0.000000e+00, double* %scevgep58, align 8 + br label %bb14 + +bb14: ; preds = %bb14, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp50, %bb14 ] + %8 = phi double [ 0.000000e+00, %bb.nph ], [ %12, %bb14 ] + %tmp26 = add i64 %indvar, 1 + %scevgep = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp26, i64 %tmp28 + %scevgep49 = getelementptr [501 x [501 x double]]* @data, i64 0, i64 %tmp26, i64 %storemerge214 + %tmp50 = add i64 %indvar, 1 + %9 = load double* %scevgep49, align 8 + %10 = load double* %scevgep, align 8 + %11 = fmul double %9, %10 + %12 = fadd double %8, %11 + %exitcond1 = icmp eq i64 %tmp50, 500 + br i1 %exitcond1, label %bb16, label %bb14 + +bb16: ; preds = %bb14 + %.lcssa = phi double [ %12, %bb14 ] + store double %.lcssa, double* %scevgep58 + store double %.lcssa, double* %scevgep56, align 8 + %indvar.next47 = add i64 %indvar46, 1 + %exitcond = icmp eq i64 %indvar.next47, %tmp8 + br i1 %exitcond, label %bb18.loopexit, label %bb.nph + +bb18.loopexit: ; preds = %bb16 + br label %bb18 + +bb18: ; preds = %bb18.loopexit, %bb17.preheader + %indvar.next = add i64 %indvar2, 1 + %exitcond19 = icmp eq i64 %indvar.next, 500 + br i1 %exitcond19, label %return, label %bb17.preheader + +bb17.preheader: ; preds = %bb17.preheader.preheader, %bb18 + %indvar2 = phi i64 [ 0, %bb17.preheader.preheader ], [ %indvar.next, %bb18 ] + %tmp21 = mul i64 %indvar2, 502 + %tmp22 = add i64 %tmp21, 1 + %storemerge214 = add i64 %indvar2, 1 + %tmp30 = mul i64 %indvar2, -1 + %tmp8 = add i64 %tmp30, 500 + br i1 false, label %bb18, label %bb.nph13.bb.nph13.split_crit_edge + +return: ; preds = %bb18 + ret void +} +; CHECK: for region: 'bb.nph36 => return' in function 'scop_func': diff --git a/polly/test/polybench/linear-algebra/kernels/2mm/2mm.c b/polly/test/polybench/linear-algebra/kernels/2mm/2mm.c new file mode 100755 index 00000000000..a23ff8fd3b9 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/2mm/2mm.c @@ -0,0 +1,168 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + + +/* Default problem size. */ +#ifndef NI +# define NI 512 +#endif +#ifndef NJ +# define NJ 512 +#endif +#ifndef NK +# define NK 512 +#endif +#ifndef NL +# define NL 512 +#endif + + +/* Default data type is double (dgemm). */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +DATA_TYPE alpha1; +DATA_TYPE beta1; +DATA_TYPE alpha2; +DATA_TYPE beta2; +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE C[NI][NJ]; +DATA_TYPE A[NI][NK]; +DATA_TYPE B[NK][NJ]; +DATA_TYPE D[NJ][NL]; +DATA_TYPE E[NI][NL]; +#else +DATA_TYPE** C = (DATA_TYPE**)malloc(NI * sizeof(DATA_TYPE*)); +DATA_TYPE** A = (DATA_TYPE**)malloc(NI * sizeof(DATA_TYPE*)); +DATA_TYPE** B = (DATA_TYPE**)malloc(NK * sizeof(DATA_TYPE*)); +DATA_TYPE** D = (DATA_TYPE**)malloc(NJ * sizeof(DATA_TYPE*)); +DATA_TYPE** E = (DATA_TYPE**)malloc(NI * sizeof(DATA_TYPE*)); +{ + int i; + for (i = 0; i < NI; ++i) + { + C[i] = (DATA_TYPE*)malloc(NJ * sizeof(DATA_TYPE)); + A[i] = (DATA_TYPE*)malloc(NK * sizeof(DATA_TYPE)); + E[i] = (DATA_TYPE*)malloc(NL * sizeof(DATA_TYPE)); + } + for (i = 0; i < NK; ++i) + B[i] = (DATA_TYPE*)malloc(NJ * sizeof(DATA_TYPE)); + for (i = 0; i < NJ; ++i) + D[i] = (DATA_TYPE*)malloc(NL * sizeof(DATA_TYPE)); +} +#endif + + +inline +void init_array() +{ + int i, j; + + alpha1 = 32412; + beta1 = 2123; + alpha2 = 132412; + beta2 = 92123; + for (i = 0; i < NI; i++) + for (j = 0; j < NK; j++) + A[i][j] = ((DATA_TYPE) i*j)/NI; + for (i = 0; i < NK; i++) + for (j = 0; j < NJ; j++) + B[i][j] = ((DATA_TYPE) i*j + 1)/NJ; + for (i = 0; i < NI; i++) + for (j = 0; j < NJ; j++) + C[i][j] = ((DATA_TYPE) i*j + 2)/NJ; + for (i = 0; i < NJ; i++) + for (j = 0; j < NL; j++) + D[i][j] = ((DATA_TYPE) i*j + 2)/NJ; + for (i = 0; i < NI; i++) + for (j = 0; j < NL; j++) + E[i][j] = ((DATA_TYPE) i*j + 2)/NJ; +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < NI; i++) { + for (j = 0; j < NL; j++) { + fprintf(stderr, "%0.2lf ", E[i][j]); + if ((i * NI + j) % 80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long ni = NI; + long nj = NJ; + long nk = NK; + long nl = NL; +#else +void scop_func(long ni, long nj, long nk, long nl) { +#endif + long i, j, k; +#pragma scop +#pragma live-out E + + /* E := A*B*C */ + for (i = 0; i < ni; i++) + for (j = 0; j < nj; j++) + { + C[i][j] = 0; + for (k = 0; k < nk; ++k) + C[i][j] += A[i][k] * B[k][j]; + } + for (i = 0; i < ni; i++) + for (j = 0; j < nl; j++) + { + E[i][j] = 0; + for (k = 0; k < nj; ++k) + E[i][j] += C[i][k] * D[k][j]; + } + + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int i, j, k; + int ni = NI; + int nj = NJ; + int nk = NK; + int nl = NL; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(ni, nj, nk, nl); +#endif + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/linear-algebra/kernels/2mm/2mm_with_param.ll b/polly/test/polybench/linear-algebra/kernels/2mm/2mm_with_param.ll new file mode 100644 index 00000000000..646ed3e7cf5 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/2mm/2mm_with_param.ll @@ -0,0 +1,164 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze %s| FileCheck %s +; ModuleID = './linear-algebra/kernels/2mm/2mm_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@alpha1 = common global double 0.000000e+00 +@beta1 = common global double 0.000000e+00 +@alpha2 = common global double 0.000000e+00 +@beta2 = common global double 0.000000e+00 +@A = common global [512 x [512 x double]] zeroinitializer, align 32 +@B = common global [512 x [512 x double]] zeroinitializer, align 32 +@C = common global [512 x [512 x double]] zeroinitializer, align 32 +@D = common global [512 x [512 x double]] zeroinitializer, align 32 +@E = common global [512 x [512 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func(i64 %ni, i64 %nj, i64 %nk, i64 %nl) nounwind { +entry: + %0 = icmp sgt i64 %ni, 0 + br i1 %0, label %bb.nph50, label %return + +bb.nph35: ; preds = %bb.nph35.preheader, %bb6 + %indvar17 = phi i64 [ 0, %bb.nph35.preheader ], [ %indvar.next18, %bb6 ] + br i1 %8, label %bb.nph27.us.preheader, label %bb4.preheader + +bb.nph27.us.preheader: ; preds = %bb.nph35 + br label %bb.nph27.us + +bb4.preheader: ; preds = %bb.nph35 + br label %bb4 + +bb4.us: ; preds = %bb2.us + %.lcssa20 = phi double [ %5, %bb2.us ] + store double %.lcssa20, double* %scevgep64 + %1 = add nsw i64 %storemerge431.us, 1 + %exitcond24 = icmp eq i64 %1, %nj + br i1 %exitcond24, label %bb6.loopexit2, label %bb.nph27.us + +bb2.us: ; preds = %bb.nph27.us, %bb2.us + %.tmp.029.us = phi double [ 0.000000e+00, %bb.nph27.us ], [ %5, %bb2.us ] + %storemerge526.us = phi i64 [ 0, %bb.nph27.us ], [ %6, %bb2.us ] + %scevgep61 = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %indvar17, i64 %storemerge526.us + %scevgep60 = getelementptr [512 x [512 x double]]* @B, i64 0, i64 %storemerge526.us, i64 %storemerge431.us + %2 = load double* %scevgep61, align 8 + %3 = load double* %scevgep60, align 8 + %4 = fmul double %2, %3 + %5 = fadd double %.tmp.029.us, %4 + %6 = add nsw i64 %storemerge526.us, 1 + %exitcond21 = icmp eq i64 %6, %nk + br i1 %exitcond21, label %bb4.us, label %bb2.us + +bb.nph27.us: ; preds = %bb.nph27.us.preheader, %bb4.us + %storemerge431.us = phi i64 [ %1, %bb4.us ], [ 0, %bb.nph27.us.preheader ] + %scevgep64 = getelementptr [512 x [512 x double]]* @C, i64 0, i64 %indvar17, i64 %storemerge431.us + store double 0.000000e+00, double* %scevgep64, align 8 + br label %bb2.us + +bb4: ; preds = %bb4.preheader, %bb4 + %indvar67 = phi i64 [ %indvar.next68, %bb4 ], [ 0, %bb4.preheader ] + %scevgep72 = getelementptr [512 x [512 x double]]* @C, i64 0, i64 %indvar17, i64 %indvar67 + store double 0.000000e+00, double* %scevgep72, align 8 + %indvar.next68 = add i64 %indvar67, 1 + %exitcond16 = icmp eq i64 %indvar.next68, %nj + br i1 %exitcond16, label %bb6.loopexit, label %bb4 + +bb6.loopexit: ; preds = %bb4 + br label %bb6 + +bb6.loopexit2: ; preds = %bb4.us + br label %bb6 + +bb6: ; preds = %bb6.loopexit2, %bb6.loopexit + %indvar.next18 = add i64 %indvar17, 1 + %exitcond27 = icmp ne i64 %indvar.next18, %ni + br i1 %exitcond27, label %bb.nph35, label %bb16.preheader.loopexit + +bb.nph50: ; preds = %entry + %7 = icmp sgt i64 %nj, 0 + %8 = icmp sgt i64 %nk, 0 + br i1 %7, label %bb.nph35.preheader, label %bb16.preheader + +bb.nph35.preheader: ; preds = %bb.nph50 + br label %bb.nph35 + +bb16.preheader.loopexit: ; preds = %bb6 + br label %bb16.preheader + +bb16.preheader: ; preds = %bb16.preheader.loopexit, %bb.nph50 + br i1 %0, label %bb.nph25, label %return + +bb.nph11: ; preds = %bb.nph11.preheader, %bb15 + %indvar4 = phi i64 [ 0, %bb.nph11.preheader ], [ %indvar.next5, %bb15 ] + br i1 %16, label %bb.nph.us.preheader, label %bb13.preheader + +bb.nph.us.preheader: ; preds = %bb.nph11 + br label %bb.nph.us + +bb13.preheader: ; preds = %bb.nph11 + br label %bb13 + +bb13.us: ; preds = %bb11.us + %.lcssa = phi double [ %13, %bb11.us ] + store double %.lcssa, double* %scevgep54 + %9 = add nsw i64 %storemerge27.us, 1 + %exitcond = icmp eq i64 %9, %nl + br i1 %exitcond, label %bb15.loopexit1, label %bb.nph.us + +bb11.us: ; preds = %bb.nph.us, %bb11.us + %.tmp.0.us = phi double [ 0.000000e+00, %bb.nph.us ], [ %13, %bb11.us ] + %storemerge36.us = phi i64 [ 0, %bb.nph.us ], [ %14, %bb11.us ] + %scevgep51 = getelementptr [512 x [512 x double]]* @C, i64 0, i64 %indvar4, i64 %storemerge36.us + %scevgep = getelementptr [512 x [512 x double]]* @D, i64 0, i64 %storemerge36.us, i64 %storemerge27.us + %10 = load double* %scevgep51, align 8 + %11 = load double* %scevgep, align 8 + %12 = fmul double %10, %11 + %13 = fadd double %.tmp.0.us, %12 + %14 = add nsw i64 %storemerge36.us, 1 + %exitcond7 = icmp eq i64 %14, %nj + br i1 %exitcond7, label %bb13.us, label %bb11.us + +bb.nph.us: ; preds = %bb.nph.us.preheader, %bb13.us + %storemerge27.us = phi i64 [ %9, %bb13.us ], [ 0, %bb.nph.us.preheader ] + %scevgep54 = getelementptr [512 x [512 x double]]* @E, i64 0, i64 %indvar4, i64 %storemerge27.us + store double 0.000000e+00, double* %scevgep54, align 8 + br label %bb11.us + +bb13: ; preds = %bb13.preheader, %bb13 + %indvar = phi i64 [ %indvar.next, %bb13 ], [ 0, %bb13.preheader ] + %scevgep57 = getelementptr [512 x [512 x double]]* @E, i64 0, i64 %indvar4, i64 %indvar + store double 0.000000e+00, double* %scevgep57, align 8 + %indvar.next = add i64 %indvar, 1 + %exitcond3 = icmp eq i64 %indvar.next, %nl + br i1 %exitcond3, label %bb15.loopexit, label %bb13 + +bb15.loopexit: ; preds = %bb13 + br label %bb15 + +bb15.loopexit1: ; preds = %bb13.us + br label %bb15 + +bb15: ; preds = %bb15.loopexit1, %bb15.loopexit + %indvar.next5 = add i64 %indvar4, 1 + %exitcond12 = icmp ne i64 %indvar.next5, %ni + br i1 %exitcond12, label %bb.nph11, label %return.loopexit + +bb.nph25: ; preds = %bb16.preheader + %15 = icmp sgt i64 %nl, 0 + %16 = icmp sgt i64 %nj, 0 + br i1 %15, label %bb.nph11.preheader, label %return + +bb.nph11.preheader: ; preds = %bb.nph25 + br label %bb.nph11 + +return.loopexit: ; preds = %bb15 + br label %return + +return: ; preds = %return.loopexit, %bb.nph25, %bb16.preheader, %entry + ret void +} +; CHECK: for region: 'entry.split => return' in function 'scop_func': diff --git a/polly/test/polybench/linear-algebra/kernels/2mm/2mm_without_param.ll b/polly/test/polybench/linear-algebra/kernels/2mm/2mm_without_param.ll new file mode 100644 index 00000000000..eef2b634710 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/2mm/2mm_without_param.ll @@ -0,0 +1,101 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/2mm/2mm_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@alpha1 = common global double 0.000000e+00 +@beta1 = common global double 0.000000e+00 +@alpha2 = common global double 0.000000e+00 +@beta2 = common global double 0.000000e+00 +@A = common global [512 x [512 x double]] zeroinitializer, align 32 +@B = common global [512 x [512 x double]] zeroinitializer, align 32 +@C = common global [512 x [512 x double]] zeroinitializer, align 32 +@D = common global [512 x [512 x double]] zeroinitializer, align 32 +@E = common global [512 x [512 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func() nounwind { +bb.nph50.bb.nph50.split_crit_edge: + br label %bb5.preheader + +bb4.us: ; preds = %bb2.us + %.lcssa9 = phi double [ %4, %bb2.us ] + store double %.lcssa9, double* %scevgep61 + %0 = add nsw i64 %storemerge431.us, 1 + %exitcond13 = icmp eq i64 %0, 512 + br i1 %exitcond13, label %bb6, label %bb.nph27.us + +bb2.us: ; preds = %bb.nph27.us, %bb2.us + %.tmp.029.us = phi double [ 0.000000e+00, %bb.nph27.us ], [ %4, %bb2.us ] + %storemerge526.us = phi i64 [ 0, %bb.nph27.us ], [ %5, %bb2.us ] + %scevgep58 = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %storemerge37, i64 %storemerge526.us + %scevgep57 = getelementptr [512 x [512 x double]]* @B, i64 0, i64 %storemerge526.us, i64 %storemerge431.us + %1 = load double* %scevgep58, align 8 + %2 = load double* %scevgep57, align 8 + %3 = fmul double %1, %2 + %4 = fadd double %.tmp.029.us, %3 + %5 = add nsw i64 %storemerge526.us, 1 + %exitcond10 = icmp eq i64 %5, 512 + br i1 %exitcond10, label %bb4.us, label %bb2.us + +bb.nph27.us: ; preds = %bb5.preheader, %bb4.us + %storemerge431.us = phi i64 [ %0, %bb4.us ], [ 0, %bb5.preheader ] + %scevgep61 = getelementptr [512 x [512 x double]]* @C, i64 0, i64 %storemerge37, i64 %storemerge431.us + store double 0.000000e+00, double* %scevgep61, align 8 + br label %bb2.us + +bb6: ; preds = %bb4.us + %6 = add nsw i64 %storemerge37, 1 + %exitcond16 = icmp ne i64 %6, 512 + br i1 %exitcond16, label %bb5.preheader, label %bb14.preheader.preheader + +bb14.preheader.preheader: ; preds = %bb6 + br label %bb14.preheader + +bb5.preheader: ; preds = %bb6, %bb.nph50.bb.nph50.split_crit_edge + %storemerge37 = phi i64 [ 0, %bb.nph50.bb.nph50.split_crit_edge ], [ %6, %bb6 ] + br label %bb.nph27.us + +bb13.us: ; preds = %bb11.us + %.lcssa = phi double [ %11, %bb11.us ] + store double %.lcssa, double* %scevgep54 + %7 = add nsw i64 %storemerge27.us, 1 + %exitcond = icmp eq i64 %7, 512 + br i1 %exitcond, label %bb15, label %bb.nph.us + +bb11.us: ; preds = %bb.nph.us, %bb11.us + %.tmp.0.us = phi double [ 0.000000e+00, %bb.nph.us ], [ %11, %bb11.us ] + %storemerge36.us = phi i64 [ 0, %bb.nph.us ], [ %12, %bb11.us ] + %scevgep51 = getelementptr [512 x [512 x double]]* @C, i64 0, i64 %storemerge112, i64 %storemerge36.us + %scevgep = getelementptr [512 x [512 x double]]* @D, i64 0, i64 %storemerge36.us, i64 %storemerge27.us + %8 = load double* %scevgep51, align 8 + %9 = load double* %scevgep, align 8 + %10 = fmul double %8, %9 + %11 = fadd double %.tmp.0.us, %10 + %12 = add nsw i64 %storemerge36.us, 1 + %exitcond1 = icmp eq i64 %12, 512 + br i1 %exitcond1, label %bb13.us, label %bb11.us + +bb.nph.us: ; preds = %bb14.preheader, %bb13.us + %storemerge27.us = phi i64 [ %7, %bb13.us ], [ 0, %bb14.preheader ] + %scevgep54 = getelementptr [512 x [512 x double]]* @E, i64 0, i64 %storemerge112, i64 %storemerge27.us + store double 0.000000e+00, double* %scevgep54, align 8 + br label %bb11.us + +bb15: ; preds = %bb13.us + %13 = add nsw i64 %storemerge112, 1 + %exitcond6 = icmp ne i64 %13, 512 + br i1 %exitcond6, label %bb14.preheader, label %return + +bb14.preheader: ; preds = %bb14.preheader.preheader, %bb15 + %storemerge112 = phi i64 [ %13, %bb15 ], [ 0, %bb14.preheader.preheader ] + br label %bb.nph.us + +return: ; preds = %bb15 + ret void +} +; CHECK: Valid Region for Scop: bb5.preheader => return diff --git a/polly/test/polybench/linear-algebra/kernels/2mm/compiler.opts b/polly/test/polybench/linear-algebra/kernels/2mm/compiler.opts new file mode 100755 index 00000000000..483defa8010 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/2mm/compiler.opts @@ -0,0 +1 @@ +-DNI=1024 -DNJ=1024 -DNK=1024 -DNL=1024 diff --git a/polly/test/polybench/linear-algebra/kernels/3mm/3mm.c b/polly/test/polybench/linear-algebra/kernels/3mm/3mm.c new file mode 100755 index 00000000000..d7e5384f2c6 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/3mm/3mm.c @@ -0,0 +1,217 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + + +/* Default problem size. */ +#ifndef NI +# define NI 512 +#endif +#ifndef NJ +# define NJ 512 +#endif +#ifndef NK +# define NK 512 +#endif +#ifndef NL +# define NL 512 +#endif +#ifndef NM +# define NM 512 +#endif + + +/* Default data type is double (dgemm). */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE A[NI][NK]; +DATA_TYPE B[NK][NJ]; +DATA_TYPE C[NJ][NM]; +DATA_TYPE D[NM][NL]; +DATA_TYPE E[NI][NJ]; +DATA_TYPE F[NJ][NL]; +DATA_TYPE G[NI][NL]; +#else +DATA_TYPE** A = (DATA_TYPE**)malloc(NI * sizeof(DATA_TYPE*)); +DATA_TYPE** B = (DATA_TYPE**)malloc(NK * sizeof(DATA_TYPE*)); +DATA_TYPE** C = (DATA_TYPE**)malloc(NJ * sizeof(DATA_TYPE*)); +DATA_TYPE** D = (DATA_TYPE**)malloc(NM * sizeof(DATA_TYPE*)); +DATA_TYPE** E = (DATA_TYPE**)malloc(NI * sizeof(DATA_TYPE*)); +DATA_TYPE** F = (DATA_TYPE**)malloc(NJ * sizeof(DATA_TYPE*)); +DATA_TYPE** G = (DATA_TYPE**)malloc(NI * sizeof(DATA_TYPE*)); +{ + int i; + for (i = 0; i < NI; ++i) + { + A[i] = (DATA_TYPE*)malloc(NK * sizeof(DATA_TYPE)); + E[i] = (DATA_TYPE*)malloc(NJ * sizeof(DATA_TYPE)); + G[i] = (DATA_TYPE*)malloc(NL * sizeof(DATA_TYPE)); + } + for (i = 0; i < NK; ++i) + B[i] = (DATA_TYPE*)malloc(NJ * sizeof(DATA_TYPE)); + for (i = 0; i < NJ; ++i) + { + C[i] = (DATA_TYPE*)malloc(NM * sizeof(DATA_TYPE)); + F[i] = (DATA_TYPE*)malloc(NL * sizeof(DATA_TYPE)); + } + for (i = 0; i < NM; ++i) + D[i] = (DATA_TYPE*)malloc(NL * sizeof(DATA_TYPE)); +} +#endif + + +inline +void init_array() +{ + int i, j; + + for (i = 0; i < NI; i++) + for (j = 0; j < NK; j++) + A[i][j] = ((DATA_TYPE) i*j)/NI; + for (i = 0; i < NK; i++) + for (j = 0; j < NJ; j++) + B[i][j] = ((DATA_TYPE) i*j + 1)/NJ; + for (i = 0; i < NJ; i++) + for (j = 0; j < NM; j++) + C[i][j] = ((DATA_TYPE) i*j + 2)/NJ; + for (i = 0; i < NM; i++) + for (j = 0; j < NL; j++) + D[i][j] = ((DATA_TYPE) i*j + 2)/NJ; + for (i = 0; i < NI; i++) + for (j = 0; j < NJ; j++) + E[i][j] = ((DATA_TYPE) i*j + 2)/NJ; + for (i = 0; i < NJ; i++) + for (j = 0; j < NL; j++) + F[i][j] = ((DATA_TYPE) i*j + 2)/NJ; + for (i = 0; i < NI; i++) + for (j = 0; j < NL; j++) + G[i][j] = ((DATA_TYPE) i*j + 2)/NJ; +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < NI; i++) { + for (j = 0; j < NL; j++) { + fprintf(stderr, "%0.2lf ", G[i][j]); + if ((i * NI + j) % 80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long ni = NI; + long nj = NJ; + long nk = NK; + long nl = NL; + long nm = NM; +#else +void scop_func(long ni, long nj, long nk, long nl, long nm) { +#endif + long i, j, k; + +#pragma scop +#pragma live-out G + + /* /\* E := A*B *\/ */ + /* for (i = 0; i < ni; i++) */ + /* for (j = 0; j < nj; j++) */ + /* { */ + /* E[i][j] = 0; */ + /* for (k = 0; k < nk; ++k) */ + /* E[i][j] += A[i][k] * B[k][j]; */ + /* } */ + + /* /\* F := C*D *\/ */ + /* for (i = 0; i < nj; i++) */ + /* for (j = 0; j < nl; j++) */ + /* { */ + /* F[i][j] = 0; */ + /* for (k = 0; k < nm; ++k) */ + /* F[i][j] += C[i][k] * D[k][j]; */ + /* } */ + /* /\* G := E*F *\/ */ + /* for (i = 0; i < ni; i++) */ + /* for (j = 0; j < nl; j++) */ + /* { */ + /* G[i][j] = 0; */ + /* for (k = 0; k < nj; ++k) */ + /* G[i][j] += E[i][k] * F[k][j]; */ + /* } */ + + /// FIXME: Remove some parameters, CLooG-ISL crashes... + + /* E := A*B */ + for (i = 0; i < ni; i++) + for (j = 0; j < ni; j++) + { + E[i][j] = 0; + for (k = 0; k < nk; ++k) + E[i][j] += A[i][k] * B[k][j]; + } + + /* F := C*D */ + for (i = 0; i < ni; i++) + for (j = 0; j < ni; j++) + { + F[i][j] = 0; + for (k = 0; k < nk; ++k) + F[i][j] += C[i][k] * D[k][j]; + } + /* G := E*F */ + for (i = 0; i < ni; i++) + for (j = 0; j < ni; j++) + { + G[i][j] = 0; + for (k = 0; k < nk; ++k) + G[i][j] += E[i][k] * F[k][j]; + } + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int i, j, k; + int ni = NI; + int nj = NJ; + int nk = NK; + int nl = NL; + int nm = NM; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(ni, nj, nk, nl, nm); +#endif + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/linear-algebra/kernels/3mm/3mm_with_param.ll b/polly/test/polybench/linear-algebra/kernels/3mm/3mm_with_param.ll new file mode 100644 index 00000000000..2c2eb52ad5e --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/3mm/3mm_with_param.ll @@ -0,0 +1,213 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze %s| FileCheck %s +; ModuleID = './linear-algebra/kernels/3mm/3mm_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [512 x [512 x double]] zeroinitializer, align 32 +@B = common global [512 x [512 x double]] zeroinitializer, align 32 +@C = common global [512 x [512 x double]] zeroinitializer, align 32 +@D = common global [512 x [512 x double]] zeroinitializer, align 32 +@E = common global [512 x [512 x double]] zeroinitializer, align 32 +@F = common global [512 x [512 x double]] zeroinitializer, align 32 +@G = common global [512 x [512 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func(i64 %ni, i64 %nj, i64 %nk, i64 %nl, i64 %nm) nounwind { +entry: + %0 = icmp sgt i64 %ni, 0 + br i1 %0, label %bb.nph76.bb.nph76.split_crit_edge, label %return + +bb.nph62: ; preds = %bb.nph76.bb.nph76.split_crit_edge, %bb6 + %indvar33 = phi i64 [ 0, %bb.nph76.bb.nph76.split_crit_edge ], [ %indvar.next34, %bb6 ] + br i1 %7, label %bb.nph54.us.preheader, label %bb4.preheader + +bb.nph54.us.preheader: ; preds = %bb.nph62 + br label %bb.nph54.us + +bb4.preheader: ; preds = %bb.nph62 + br label %bb4 + +bb4.us: ; preds = %bb2.us + %.lcssa36 = phi double [ %5, %bb2.us ] + store double %.lcssa36, double* %scevgep105 + %1 = add nsw i64 %storemerge758.us, 1 + %exitcond40 = icmp eq i64 %1, %ni + br i1 %exitcond40, label %bb6.loopexit3, label %bb.nph54.us + +bb2.us: ; preds = %bb.nph54.us, %bb2.us + %.tmp.056.us = phi double [ 0.000000e+00, %bb.nph54.us ], [ %5, %bb2.us ] + %storemerge853.us = phi i64 [ 0, %bb.nph54.us ], [ %6, %bb2.us ] + %scevgep102 = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %indvar33, i64 %storemerge853.us + %scevgep101 = getelementptr [512 x [512 x double]]* @B, i64 0, i64 %storemerge853.us, i64 %storemerge758.us + %2 = load double* %scevgep102, align 8 + %3 = load double* %scevgep101, align 8 + %4 = fmul double %2, %3 + %5 = fadd double %.tmp.056.us, %4 + %6 = add nsw i64 %storemerge853.us, 1 + %exitcond37 = icmp eq i64 %6, %nk + br i1 %exitcond37, label %bb4.us, label %bb2.us + +bb.nph54.us: ; preds = %bb.nph54.us.preheader, %bb4.us + %storemerge758.us = phi i64 [ %1, %bb4.us ], [ 0, %bb.nph54.us.preheader ] + %scevgep105 = getelementptr [512 x [512 x double]]* @E, i64 0, i64 %indvar33, i64 %storemerge758.us + store double 0.000000e+00, double* %scevgep105, align 8 + br label %bb2.us + +bb4: ; preds = %bb4.preheader, %bb4 + %indvar108 = phi i64 [ %indvar.next109, %bb4 ], [ 0, %bb4.preheader ] + %scevgep113 = getelementptr [512 x [512 x double]]* @E, i64 0, i64 %indvar33, i64 %indvar108 + store double 0.000000e+00, double* %scevgep113, align 8 + %indvar.next109 = add i64 %indvar108, 1 + %exitcond32 = icmp eq i64 %indvar.next109, %ni + br i1 %exitcond32, label %bb6.loopexit, label %bb4 + +bb6.loopexit: ; preds = %bb4 + br label %bb6 + +bb6.loopexit3: ; preds = %bb4.us + br label %bb6 + +bb6: ; preds = %bb6.loopexit3, %bb6.loopexit + %indvar.next34 = add i64 %indvar33, 1 + %exitcond43 = icmp ne i64 %indvar.next34, %ni + br i1 %exitcond43, label %bb.nph62, label %bb16.preheader + +bb.nph76.bb.nph76.split_crit_edge: ; preds = %entry + %7 = icmp sgt i64 %nk, 0 + br label %bb.nph62 + +bb16.preheader: ; preds = %bb6 + br i1 %0, label %bb.nph52.bb.nph52.split_crit_edge, label %return + +bb.nph38: ; preds = %bb.nph52.bb.nph52.split_crit_edge, %bb15 + %indvar18 = phi i64 [ 0, %bb.nph52.bb.nph52.split_crit_edge ], [ %indvar.next19, %bb15 ] + br i1 %14, label %bb.nph30.us.preheader, label %bb13.preheader + +bb.nph30.us.preheader: ; preds = %bb.nph38 + br label %bb.nph30.us + +bb13.preheader: ; preds = %bb.nph38 + br label %bb13 + +bb13.us: ; preds = %bb11.us + %.lcssa21 = phi double [ %12, %bb11.us ] + store double %.lcssa21, double* %scevgep90 + %8 = add nsw i64 %storemerge534.us, 1 + %exitcond25 = icmp eq i64 %8, %ni + br i1 %exitcond25, label %bb15.loopexit2, label %bb.nph30.us + +bb11.us: ; preds = %bb.nph30.us, %bb11.us + %.tmp.032.us = phi double [ 0.000000e+00, %bb.nph30.us ], [ %12, %bb11.us ] + %storemerge629.us = phi i64 [ 0, %bb.nph30.us ], [ %13, %bb11.us ] + %scevgep87 = getelementptr [512 x [512 x double]]* @C, i64 0, i64 %indvar18, i64 %storemerge629.us + %scevgep86 = getelementptr [512 x [512 x double]]* @D, i64 0, i64 %storemerge629.us, i64 %storemerge534.us + %9 = load double* %scevgep87, align 8 + %10 = load double* %scevgep86, align 8 + %11 = fmul double %9, %10 + %12 = fadd double %.tmp.032.us, %11 + %13 = add nsw i64 %storemerge629.us, 1 + %exitcond22 = icmp eq i64 %13, %nk + br i1 %exitcond22, label %bb13.us, label %bb11.us + +bb.nph30.us: ; preds = %bb.nph30.us.preheader, %bb13.us + %storemerge534.us = phi i64 [ %8, %bb13.us ], [ 0, %bb.nph30.us.preheader ] + %scevgep90 = getelementptr [512 x [512 x double]]* @F, i64 0, i64 %indvar18, i64 %storemerge534.us + store double 0.000000e+00, double* %scevgep90, align 8 + br label %bb11.us + +bb13: ; preds = %bb13.preheader, %bb13 + %indvar93 = phi i64 [ %indvar.next94, %bb13 ], [ 0, %bb13.preheader ] + %scevgep98 = getelementptr [512 x [512 x double]]* @F, i64 0, i64 %indvar18, i64 %indvar93 + store double 0.000000e+00, double* %scevgep98, align 8 + %indvar.next94 = add i64 %indvar93, 1 + %exitcond17 = icmp eq i64 %indvar.next94, %ni + br i1 %exitcond17, label %bb15.loopexit, label %bb13 + +bb15.loopexit: ; preds = %bb13 + br label %bb15 + +bb15.loopexit2: ; preds = %bb13.us + br label %bb15 + +bb15: ; preds = %bb15.loopexit2, %bb15.loopexit + %indvar.next19 = add i64 %indvar18, 1 + %exitcond28 = icmp ne i64 %indvar.next19, %ni + br i1 %exitcond28, label %bb.nph38, label %bb25.preheader + +bb.nph52.bb.nph52.split_crit_edge: ; preds = %bb16.preheader + %14 = icmp sgt i64 %nk, 0 + br label %bb.nph38 + +bb25.preheader: ; preds = %bb15 + br i1 %0, label %bb.nph28.bb.nph28.split_crit_edge, label %return + +bb.nph14: ; preds = %bb.nph28.bb.nph28.split_crit_edge, %bb24 + %indvar5 = phi i64 [ 0, %bb.nph28.bb.nph28.split_crit_edge ], [ %indvar.next6, %bb24 ] + br i1 %21, label %bb.nph.us.preheader, label %bb22.preheader + +bb.nph.us.preheader: ; preds = %bb.nph14 + br label %bb.nph.us + +bb22.preheader: ; preds = %bb.nph14 + br label %bb22 + +bb22.us: ; preds = %bb20.us + %.lcssa = phi double [ %19, %bb20.us ] + store double %.lcssa, double* %scevgep80 + %15 = add nsw i64 %storemerge310.us, 1 + %exitcond = icmp eq i64 %15, %ni + br i1 %exitcond, label %bb24.loopexit1, label %bb.nph.us + +bb20.us: ; preds = %bb.nph.us, %bb20.us + %.tmp.0.us = phi double [ 0.000000e+00, %bb.nph.us ], [ %19, %bb20.us ] + %storemerge49.us = phi i64 [ 0, %bb.nph.us ], [ %20, %bb20.us ] + %scevgep77 = getelementptr [512 x [512 x double]]* @E, i64 0, i64 %indvar5, i64 %storemerge49.us + %scevgep = getelementptr [512 x [512 x double]]* @F, i64 0, i64 %storemerge49.us, i64 %storemerge310.us + %16 = load double* %scevgep77, align 8 + %17 = load double* %scevgep, align 8 + %18 = fmul double %16, %17 + %19 = fadd double %.tmp.0.us, %18 + %20 = add nsw i64 %storemerge49.us, 1 + %exitcond8 = icmp eq i64 %20, %nk + br i1 %exitcond8, label %bb22.us, label %bb20.us + +bb.nph.us: ; preds = %bb.nph.us.preheader, %bb22.us + %storemerge310.us = phi i64 [ %15, %bb22.us ], [ 0, %bb.nph.us.preheader ] + %scevgep80 = getelementptr [512 x [512 x double]]* @G, i64 0, i64 %indvar5, i64 %storemerge310.us + store double 0.000000e+00, double* %scevgep80, align 8 + br label %bb20.us + +bb22: ; preds = %bb22.preheader, %bb22 + %indvar = phi i64 [ %indvar.next, %bb22 ], [ 0, %bb22.preheader ] + %scevgep83 = getelementptr [512 x [512 x double]]* @G, i64 0, i64 %indvar5, i64 %indvar + store double 0.000000e+00, double* %scevgep83, align 8 + %indvar.next = add i64 %indvar, 1 + %exitcond4 = icmp eq i64 %indvar.next, %ni + br i1 %exitcond4, label %bb24.loopexit, label %bb22 + +bb24.loopexit: ; preds = %bb22 + br label %bb24 + +bb24.loopexit1: ; preds = %bb22.us + br label %bb24 + +bb24: ; preds = %bb24.loopexit1, %bb24.loopexit + %indvar.next6 = add i64 %indvar5, 1 + %exitcond13 = icmp ne i64 %indvar.next6, %ni + br i1 %exitcond13, label %bb.nph14, label %return.loopexit + +bb.nph28.bb.nph28.split_crit_edge: ; preds = %bb25.preheader + %21 = icmp sgt i64 %nk, 0 + br label %bb.nph14 + +return.loopexit: ; preds = %bb24 + br label %return + +return: ; preds = %return.loopexit, %bb25.preheader, %bb16.preheader, %entry + ret void +} +; CHECK: for region: 'entry.split => return' in function 'scop_func': diff --git a/polly/test/polybench/linear-algebra/kernels/3mm/3mm_without_param.ll b/polly/test/polybench/linear-algebra/kernels/3mm/3mm_without_param.ll new file mode 100644 index 00000000000..25902dd4ac9 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/3mm/3mm_without_param.ll @@ -0,0 +1,137 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/3mm/3mm_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [512 x [512 x double]] zeroinitializer, align 32 +@B = common global [512 x [512 x double]] zeroinitializer, align 32 +@C = common global [512 x [512 x double]] zeroinitializer, align 32 +@D = common global [512 x [512 x double]] zeroinitializer, align 32 +@E = common global [512 x [512 x double]] zeroinitializer, align 32 +@F = common global [512 x [512 x double]] zeroinitializer, align 32 +@G = common global [512 x [512 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func() nounwind { +bb.nph76.bb.nph76.split_crit_edge: + br label %bb5.preheader + +bb4.us: ; preds = %bb2.us + %.lcssa19 = phi double [ %4, %bb2.us ] + store double %.lcssa19, double* %scevgep94 + %0 = add nsw i64 %storemerge758.us, 1 + %exitcond23 = icmp eq i64 %0, 512 + br i1 %exitcond23, label %bb6, label %bb.nph54.us + +bb2.us: ; preds = %bb.nph54.us, %bb2.us + %.tmp.056.us = phi double [ 0.000000e+00, %bb.nph54.us ], [ %4, %bb2.us ] + %storemerge853.us = phi i64 [ 0, %bb.nph54.us ], [ %5, %bb2.us ] + %scevgep91 = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %storemerge63, i64 %storemerge853.us + %scevgep90 = getelementptr [512 x [512 x double]]* @B, i64 0, i64 %storemerge853.us, i64 %storemerge758.us + %1 = load double* %scevgep91, align 8 + %2 = load double* %scevgep90, align 8 + %3 = fmul double %1, %2 + %4 = fadd double %.tmp.056.us, %3 + %5 = add nsw i64 %storemerge853.us, 1 + %exitcond20 = icmp eq i64 %5, 512 + br i1 %exitcond20, label %bb4.us, label %bb2.us + +bb.nph54.us: ; preds = %bb5.preheader, %bb4.us + %storemerge758.us = phi i64 [ %0, %bb4.us ], [ 0, %bb5.preheader ] + %scevgep94 = getelementptr [512 x [512 x double]]* @E, i64 0, i64 %storemerge63, i64 %storemerge758.us + store double 0.000000e+00, double* %scevgep94, align 8 + br label %bb2.us + +bb6: ; preds = %bb4.us + %6 = add nsw i64 %storemerge63, 1 + %exitcond26 = icmp ne i64 %6, 512 + br i1 %exitcond26, label %bb5.preheader, label %bb14.preheader.preheader + +bb14.preheader.preheader: ; preds = %bb6 + br label %bb14.preheader + +bb5.preheader: ; preds = %bb6, %bb.nph76.bb.nph76.split_crit_edge + %storemerge63 = phi i64 [ 0, %bb.nph76.bb.nph76.split_crit_edge ], [ %6, %bb6 ] + br label %bb.nph54.us + +bb13.us: ; preds = %bb11.us + %.lcssa9 = phi double [ %11, %bb11.us ] + store double %.lcssa9, double* %scevgep87 + %7 = add nsw i64 %storemerge534.us, 1 + %exitcond13 = icmp eq i64 %7, 512 + br i1 %exitcond13, label %bb15, label %bb.nph30.us + +bb11.us: ; preds = %bb.nph30.us, %bb11.us + %.tmp.032.us = phi double [ 0.000000e+00, %bb.nph30.us ], [ %11, %bb11.us ] + %storemerge629.us = phi i64 [ 0, %bb.nph30.us ], [ %12, %bb11.us ] + %scevgep84 = getelementptr [512 x [512 x double]]* @C, i64 0, i64 %storemerge139, i64 %storemerge629.us + %scevgep83 = getelementptr [512 x [512 x double]]* @D, i64 0, i64 %storemerge629.us, i64 %storemerge534.us + %8 = load double* %scevgep84, align 8 + %9 = load double* %scevgep83, align 8 + %10 = fmul double %8, %9 + %11 = fadd double %.tmp.032.us, %10 + %12 = add nsw i64 %storemerge629.us, 1 + %exitcond10 = icmp eq i64 %12, 512 + br i1 %exitcond10, label %bb13.us, label %bb11.us + +bb.nph30.us: ; preds = %bb14.preheader, %bb13.us + %storemerge534.us = phi i64 [ %7, %bb13.us ], [ 0, %bb14.preheader ] + %scevgep87 = getelementptr [512 x [512 x double]]* @F, i64 0, i64 %storemerge139, i64 %storemerge534.us + store double 0.000000e+00, double* %scevgep87, align 8 + br label %bb11.us + +bb15: ; preds = %bb13.us + %13 = add nsw i64 %storemerge139, 1 + %exitcond16 = icmp ne i64 %13, 512 + br i1 %exitcond16, label %bb14.preheader, label %bb23.preheader.preheader + +bb23.preheader.preheader: ; preds = %bb15 + br label %bb23.preheader + +bb14.preheader: ; preds = %bb14.preheader.preheader, %bb15 + %storemerge139 = phi i64 [ %13, %bb15 ], [ 0, %bb14.preheader.preheader ] + br label %bb.nph30.us + +bb22.us: ; preds = %bb20.us + %.lcssa = phi double [ %18, %bb20.us ] + store double %.lcssa, double* %scevgep80 + %14 = add nsw i64 %storemerge310.us, 1 + %exitcond = icmp eq i64 %14, 512 + br i1 %exitcond, label %bb24, label %bb.nph.us + +bb20.us: ; preds = %bb.nph.us, %bb20.us + %.tmp.0.us = phi double [ 0.000000e+00, %bb.nph.us ], [ %18, %bb20.us ] + %storemerge49.us = phi i64 [ 0, %bb.nph.us ], [ %19, %bb20.us ] + %scevgep77 = getelementptr [512 x [512 x double]]* @E, i64 0, i64 %storemerge215, i64 %storemerge49.us + %scevgep = getelementptr [512 x [512 x double]]* @F, i64 0, i64 %storemerge49.us, i64 %storemerge310.us + %15 = load double* %scevgep77, align 8 + %16 = load double* %scevgep, align 8 + %17 = fmul double %15, %16 + %18 = fadd double %.tmp.0.us, %17 + %19 = add nsw i64 %storemerge49.us, 1 + %exitcond1 = icmp eq i64 %19, 512 + br i1 %exitcond1, label %bb22.us, label %bb20.us + +bb.nph.us: ; preds = %bb23.preheader, %bb22.us + %storemerge310.us = phi i64 [ %14, %bb22.us ], [ 0, %bb23.preheader ] + %scevgep80 = getelementptr [512 x [512 x double]]* @G, i64 0, i64 %storemerge215, i64 %storemerge310.us + store double 0.000000e+00, double* %scevgep80, align 8 + br label %bb20.us + +bb24: ; preds = %bb22.us + %20 = add nsw i64 %storemerge215, 1 + %exitcond6 = icmp ne i64 %20, 512 + br i1 %exitcond6, label %bb23.preheader, label %return + +bb23.preheader: ; preds = %bb23.preheader.preheader, %bb24 + %storemerge215 = phi i64 [ %20, %bb24 ], [ 0, %bb23.preheader.preheader ] + br label %bb.nph.us + +return: ; preds = %bb24 + ret void +} +; CHECK: Valid Region for Scop: bb5.preheader => return diff --git a/polly/test/polybench/linear-algebra/kernels/3mm/compiler.opts b/polly/test/polybench/linear-algebra/kernels/3mm/compiler.opts new file mode 100755 index 00000000000..9d37e0bfe5f --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/3mm/compiler.opts @@ -0,0 +1 @@ +-DNI=1024 -DNJ=1024 -DNK=1024 -DNL=1024 -DNM=1024 diff --git a/polly/test/polybench/linear-algebra/kernels/atax/atax.c b/polly/test/polybench/linear-algebra/kernels/atax/atax.c new file mode 100755 index 00000000000..3725dd7fa86 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/atax/atax.c @@ -0,0 +1,119 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + +/* Default problem size. */ +#ifndef NX +# define NX 8000 +#endif +#ifndef NY +# define NY 8000 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE A[NX][NY]; +DATA_TYPE x[NY]; +DATA_TYPE y[NY]; +DATA_TYPE tmp[NX]; +#else +DATA_TYPE** A = (DATA_TYPE**)malloc(NX * sizeof(DATA_TYPE*)); +DATA_TYPE* x = (DATA_TYPE*)malloc(NY * sizeof(DATA_TYPE)); +DATA_TYPE* y = (DATA_TYPE*)malloc(NY * sizeof(DATA_TYPE)); +DATA_TYPE* tmp = (DATA_TYPE*)malloc(NX * sizeof(DATA_TYPE)); +{ + int i; + for (i = 0; i < NX; ++i) + A[i] = (DATA_TYPE*)malloc(NY * sizeof(DATA_TYPE)); +} +#endif + +inline +void init_array() +{ + int i, j; + + for (i = 0; i < NX; i++) + { + x[i] = i * M_PI; + for (j = 0; j < NY; j++) + A[i][j] = ((DATA_TYPE) i*j) / NX; + } +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < NX; i++) { + fprintf(stderr, "%0.2lf ", y[i]); + if (i%80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long nx = NX; + long ny = NY; +#else +void scop_func(long nx, long ny) { +#endif + long i, j; + +#pragma scop +#pragma live-out y + for (i= 0; i < nx; i++) + y[i] = 0; + for (i = 0; i < ny; i++) + { + tmp[i] = 0; + for (j = 0; j < ny; j++) + tmp[i] = tmp[i] + A[i][j] * x[j]; + for (j = 0; j < ny; j++) + y[j] = y[j] + A[i][j] * tmp[i]; + } +#pragma endscop +} + +int main(int argc, char** argv) +{ + int i, j; + int nx = NX; + int ny = NY; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(nx, ny); +#endif + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/linear-algebra/kernels/atax/atax_with_param.ll b/polly/test/polybench/linear-algebra/kernels/atax/atax_with_param.ll new file mode 100644 index 00000000000..72c79005b1a --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/atax/atax_with_param.ll @@ -0,0 +1,91 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/atax/atax_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@x = common global [8000 x double] zeroinitializer, align 32 +@A = common global [8000 x [8000 x double]] zeroinitializer, align 32 +@y = common global [8000 x double] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@tmp = common global [8000 x double] zeroinitializer, align 32 + +define void @scop_func(i64 %nx, i64 %ny) nounwind { +entry: + %0 = icmp sgt i64 %nx, 0 + br i1 %0, label %bb.preheader, label %bb10.preheader + +bb.preheader: ; preds = %entry + br label %bb + +bb: ; preds = %bb.preheader, %bb + %storemerge15 = phi i64 [ %1, %bb ], [ 0, %bb.preheader ] + %scevgep26 = getelementptr [8000 x double]* @y, i64 0, i64 %storemerge15 + store double 0.000000e+00, double* %scevgep26, align 8 + %1 = add nsw i64 %storemerge15, 1 + %exitcond10 = icmp eq i64 %1, %nx + br i1 %exitcond10, label %bb10.preheader.loopexit, label %bb + +bb10.preheader.loopexit: ; preds = %bb + br label %bb10.preheader + +bb10.preheader: ; preds = %bb10.preheader.loopexit, %entry + %2 = icmp sgt i64 %ny, 0 + br i1 %2, label %bb.nph.preheader, label %return + +bb.nph.preheader: ; preds = %bb10.preheader + br label %bb.nph + +bb.nph: ; preds = %bb.nph.preheader, %bb9 + %storemerge17 = phi i64 [ %13, %bb9 ], [ 0, %bb.nph.preheader ] + %scevgep24 = getelementptr [8000 x double]* @tmp, i64 0, i64 %storemerge17 + store double 0.000000e+00, double* %scevgep24, align 8 + br label %bb4 + +bb4: ; preds = %bb4, %bb.nph + %.tmp.0 = phi double [ 0.000000e+00, %bb.nph ], [ %6, %bb4 ] + %storemerge24 = phi i64 [ 0, %bb.nph ], [ %7, %bb4 ] + %scevgep17 = getelementptr [8000 x [8000 x double]]* @A, i64 0, i64 %storemerge17, i64 %storemerge24 + %scevgep = getelementptr [8000 x double]* @x, i64 0, i64 %storemerge24 + %3 = load double* %scevgep17, align 8 + %4 = load double* %scevgep, align 8 + %5 = fmul double %3, %4 + %6 = fadd double %.tmp.0, %5 + %7 = add nsw i64 %storemerge24, 1 + %exitcond1 = icmp eq i64 %7, %ny + br i1 %exitcond1, label %bb8.loopexit, label %bb4 + +bb7: ; preds = %bb8.loopexit, %bb7 + %storemerge35 = phi i64 [ %12, %bb7 ], [ 0, %bb8.loopexit ] + %scevgep19 = getelementptr [8000 x [8000 x double]]* @A, i64 0, i64 %storemerge17, i64 %storemerge35 + %scevgep20 = getelementptr [8000 x double]* @y, i64 0, i64 %storemerge35 + %8 = load double* %scevgep20, align 8 + %9 = load double* %scevgep19, align 8 + %10 = fmul double %9, %.lcssa + %11 = fadd double %8, %10 + store double %11, double* %scevgep20, align 8 + %12 = add nsw i64 %storemerge35, 1 + %exitcond = icmp eq i64 %12, %ny + br i1 %exitcond, label %bb9, label %bb7 + +bb8.loopexit: ; preds = %bb4 + %.lcssa = phi double [ %6, %bb4 ] + store double %.lcssa, double* %scevgep24 + br label %bb7 + +bb9: ; preds = %bb7 + %13 = add nsw i64 %storemerge17, 1 + %exitcond6 = icmp eq i64 %13, %ny + br i1 %exitcond6, label %return.loopexit, label %bb.nph + +return.loopexit: ; preds = %bb9 + br label %return + +return: ; preds = %return.loopexit, %bb10.preheader + ret void +} +; CHECK: for region: 'entry.split => return' in function 'scop_func': +; CHECK-NEXT: scop_func(): diff --git a/polly/test/polybench/linear-algebra/kernels/atax/atax_without_param.ll b/polly/test/polybench/linear-algebra/kernels/atax/atax_without_param.ll new file mode 100644 index 00000000000..c493a9e8b6c --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/atax/atax_without_param.ll @@ -0,0 +1,77 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/atax/atax_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@x = common global [8000 x double] zeroinitializer, align 32 +@A = common global [8000 x [8000 x double]] zeroinitializer, align 32 +@y = common global [8000 x double] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@tmp = common global [8000 x double] zeroinitializer, align 32 + +define void @scop_func() nounwind { +bb.nph16: + br label %bb + +bb: ; preds = %bb, %bb.nph16 + %storemerge15 = phi i64 [ 0, %bb.nph16 ], [ %0, %bb ] + %scevgep26 = getelementptr [8000 x double]* @y, i64 0, i64 %storemerge15 + store double 0.000000e+00, double* %scevgep26, align 8 + %0 = add nsw i64 %storemerge15, 1 + %exitcond10 = icmp eq i64 %0, 8000 + br i1 %exitcond10, label %bb.nph.preheader, label %bb + +bb.nph.preheader: ; preds = %bb + br label %bb.nph + +bb.nph: ; preds = %bb.nph.preheader, %bb9 + %storemerge17 = phi i64 [ %11, %bb9 ], [ 0, %bb.nph.preheader ] + %scevgep24 = getelementptr [8000 x double]* @tmp, i64 0, i64 %storemerge17 + store double 0.000000e+00, double* %scevgep24, align 8 + br label %bb4 + +bb4: ; preds = %bb4, %bb.nph + %.tmp.0 = phi double [ 0.000000e+00, %bb.nph ], [ %4, %bb4 ] + %storemerge24 = phi i64 [ 0, %bb.nph ], [ %5, %bb4 ] + %scevgep17 = getelementptr [8000 x [8000 x double]]* @A, i64 0, i64 %storemerge17, i64 %storemerge24 + %scevgep = getelementptr [8000 x double]* @x, i64 0, i64 %storemerge24 + %1 = load double* %scevgep17, align 8 + %2 = load double* %scevgep, align 8 + %3 = fmul double %1, %2 + %4 = fadd double %.tmp.0, %3 + %5 = add nsw i64 %storemerge24, 1 + %exitcond1 = icmp eq i64 %5, 8000 + br i1 %exitcond1, label %bb8.loopexit, label %bb4 + +bb7: ; preds = %bb8.loopexit, %bb7 + %storemerge35 = phi i64 [ %10, %bb7 ], [ 0, %bb8.loopexit ] + %scevgep19 = getelementptr [8000 x [8000 x double]]* @A, i64 0, i64 %storemerge17, i64 %storemerge35 + %scevgep20 = getelementptr [8000 x double]* @y, i64 0, i64 %storemerge35 + %6 = load double* %scevgep20, align 8 + %7 = load double* %scevgep19, align 8 + %8 = fmul double %7, %.lcssa + %9 = fadd double %6, %8 + store double %9, double* %scevgep20, align 8 + %10 = add nsw i64 %storemerge35, 1 + %exitcond = icmp eq i64 %10, 8000 + br i1 %exitcond, label %bb9, label %bb7 + +bb8.loopexit: ; preds = %bb4 + %.lcssa = phi double [ %4, %bb4 ] + store double %.lcssa, double* %scevgep24 + br label %bb7 + +bb9: ; preds = %bb7 + %11 = add nsw i64 %storemerge17, 1 + %exitcond6 = icmp eq i64 %11, 8000 + br i1 %exitcond6, label %return, label %bb.nph + +return: ; preds = %bb9 + ret void +} +; CHECK: for region: 'bb => return' in function 'scop_func': +; CHECK-NEXT: scop_func(): diff --git a/polly/test/polybench/linear-algebra/kernels/bicg/bicg.c b/polly/test/polybench/linear-algebra/kernels/bicg/bicg.c new file mode 100755 index 00000000000..43d19d8f5cc --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/bicg/bicg.c @@ -0,0 +1,126 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + +/* Default problem size. */ +#ifndef NX +# define NX 8000 +#endif +#ifndef NY +# define NY 8000 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE A[NX][NY]; +DATA_TYPE r[NX]; +DATA_TYPE s[NX]; +DATA_TYPE p[NX]; +DATA_TYPE q[NX]; +#else +DATA_TYPE** A = (DATA_TYPE**)malloc(NX * sizeof(DATA_TYPE*)); +DATA_TYPE* r = (DATA_TYPE*)malloc(NX * sizeof(DATA_TYPE)); +DATA_TYPE* s = (DATA_TYPE*)malloc(NX * sizeof(DATA_TYPE)); +DATA_TYPE* p = (DATA_TYPE*)malloc(NX * sizeof(DATA_TYPE)); +DATA_TYPE* q = (DATA_TYPE*)malloc(NX * sizeof(DATA_TYPE)); +{ + int i; + for (i = 0; i < NX; ++i) + A[i] = (DATA_TYPE*)malloc(NY * sizeof(DATA_TYPE)); +} +#endif + +inline +void init_array() +{ + int i, j; + + for (i = 0; i < NX; i++) { + r[i] = i * M_PI; + p[i] = i * M_PI; + for (j = 0; j < NY; j++) { + A[i][j] = ((DATA_TYPE) i*j)/NX; + } + } +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < NX; i++) { + fprintf(stderr, "%0.2lf ", s[i]); + fprintf(stderr, "%0.2lf ", q[i]); + if (i%80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long nx = NX; + long ny = NY; +#else +void scop_func(long nx, long ny) { +#endif + long i, j; + +#pragma scop +#pragma live-out s, q + + for (i = 0; i < ny; i++) + s[i] = 0; + + for (i = 0; i < nx; i++) + { + q[i] = 0; + for (j = 0; j < ny; j++) + { + s[j] = s[j] + r[i] * A[i][j]; + q[i] = q[i] + A[i][j] * p[j]; + } + } + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int i, j; + int nx = NX; + int ny = NY; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(nx, ny); +#endif + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/linear-algebra/kernels/bicg/bicg_with_param.ll b/polly/test/polybench/linear-algebra/kernels/bicg/bicg_with_param.ll new file mode 100644 index 00000000000..3ccd5723bf6 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/bicg/bicg_with_param.ll @@ -0,0 +1,101 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/bicg/bicg_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@r = common global [8000 x double] zeroinitializer, align 32 +@p = common global [8000 x double] zeroinitializer, align 32 +@A = common global [8000 x [8000 x double]] zeroinitializer, align 32 +@s = common global [8000 x double] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@q = common global [8000 x double] zeroinitializer, align 32 + +define void @scop_func(i64 %nx, i64 %ny) nounwind { +entry: + %0 = icmp sgt i64 %ny, 0 + br i1 %0, label %bb.preheader, label %bb7.preheader + +bb.preheader: ; preds = %entry + br label %bb + +bb: ; preds = %bb.preheader, %bb + %storemerge9 = phi i64 [ %1, %bb ], [ 0, %bb.preheader ] + %scevgep20 = getelementptr [8000 x double]* @s, i64 0, i64 %storemerge9 + store double 0.000000e+00, double* %scevgep20, align 8 + %1 = add nsw i64 %storemerge9, 1 + %exitcond11 = icmp eq i64 %1, %ny + br i1 %exitcond11, label %bb7.preheader.loopexit, label %bb + +bb7.preheader.loopexit: ; preds = %bb + br label %bb7.preheader + +bb7.preheader: ; preds = %bb7.preheader.loopexit, %entry + %2 = icmp sgt i64 %nx, 0 + br i1 %2, label %bb.nph8, label %return + +bb.nph8: ; preds = %bb7.preheader + br i1 %0, label %bb.nph.us.preheader, label %bb6.preheader + +bb.nph.us.preheader: ; preds = %bb.nph8 + br label %bb.nph.us + +bb6.preheader: ; preds = %bb.nph8 + br label %bb6 + +bb6.us: ; preds = %bb4.us + %.lcssa = phi double [ %10, %bb4.us ] + store double %.lcssa, double* %scevgep15 + %3 = add nsw i64 %storemerge14.us, 1 + %exitcond = icmp eq i64 %3, %nx + br i1 %exitcond, label %return.loopexit1, label %bb.nph.us + +bb4.us: ; preds = %bb.nph.us, %bb4.us + %.tmp.0.us = phi double [ 0.000000e+00, %bb.nph.us ], [ %10, %bb4.us ] + %storemerge23.us = phi i64 [ 0, %bb.nph.us ], [ %11, %bb4.us ] + %scevgep11 = getelementptr [8000 x [8000 x double]]* @A, i64 0, i64 %storemerge14.us, i64 %storemerge23.us + %scevgep12 = getelementptr [8000 x double]* @s, i64 0, i64 %storemerge23.us + %scevgep = getelementptr [8000 x double]* @p, i64 0, i64 %storemerge23.us + %4 = load double* %scevgep12, align 8 + %5 = load double* %scevgep11, align 8 + %6 = fmul double %12, %5 + %7 = fadd double %4, %6 + store double %7, double* %scevgep12, align 8 + %8 = load double* %scevgep, align 8 + %9 = fmul double %5, %8 + %10 = fadd double %.tmp.0.us, %9 + %11 = add nsw i64 %storemerge23.us, 1 + %exitcond4 = icmp eq i64 %11, %ny + br i1 %exitcond4, label %bb6.us, label %bb4.us + +bb.nph.us: ; preds = %bb.nph.us.preheader, %bb6.us + %storemerge14.us = phi i64 [ %3, %bb6.us ], [ 0, %bb.nph.us.preheader ] + %scevgep16 = getelementptr [8000 x double]* @r, i64 0, i64 %storemerge14.us + %scevgep15 = getelementptr [8000 x double]* @q, i64 0, i64 %storemerge14.us + store double 0.000000e+00, double* %scevgep15, align 8 + %12 = load double* %scevgep16, align 8 + br label %bb4.us + +bb6: ; preds = %bb6.preheader, %bb6 + %indvar = phi i64 [ %indvar.next, %bb6 ], [ 0, %bb6.preheader ] + %scevgep18 = getelementptr [8000 x double]* @q, i64 0, i64 %indvar + store double 0.000000e+00, double* %scevgep18, align 8 + %indvar.next = add i64 %indvar, 1 + %exitcond2 = icmp eq i64 %indvar.next, %nx + br i1 %exitcond2, label %return.loopexit, label %bb6 + +return.loopexit: ; preds = %bb6 + br label %return + +return.loopexit1: ; preds = %bb6.us + br label %return + +return: ; preds = %return.loopexit1, %return.loopexit, %bb7.preheader + ret void +} +; CHECK: for region: 'entry.split => return' in function 'scop_func': +; CHECK-NEXT: scop_func(): + diff --git a/polly/test/polybench/linear-algebra/kernels/bicg/bicg_without_param.ll b/polly/test/polybench/linear-algebra/kernels/bicg/bicg_without_param.ll new file mode 100644 index 00000000000..d414d1fdbca --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/bicg/bicg_without_param.ll @@ -0,0 +1,69 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/bicg/bicg_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@r = common global [8000 x double] zeroinitializer, align 32 +@p = common global [8000 x double] zeroinitializer, align 32 +@A = common global [8000 x [8000 x double]] zeroinitializer, align 32 +@s = common global [8000 x double] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@q = common global [8000 x double] zeroinitializer, align 32 + +define void @scop_func() nounwind { +bb.nph10: + br label %bb + +bb: ; preds = %bb, %bb.nph10 + %storemerge9 = phi i64 [ 0, %bb.nph10 ], [ %0, %bb ] + %scevgep18 = getelementptr [8000 x double]* @s, i64 0, i64 %storemerge9 + store double 0.000000e+00, double* %scevgep18, align 8 + %0 = add nsw i64 %storemerge9, 1 + %exitcond8 = icmp eq i64 %0, 8000 + br i1 %exitcond8, label %bb.nph.us.preheader, label %bb + +bb.nph.us.preheader: ; preds = %bb + br label %bb.nph.us + +bb6.us: ; preds = %bb4.us + %.lcssa = phi double [ %8, %bb4.us ] + store double %.lcssa, double* %scevgep15 + %1 = add nsw i64 %storemerge14.us, 1 + %exitcond = icmp eq i64 %1, 8000 + br i1 %exitcond, label %return, label %bb.nph.us + +bb4.us: ; preds = %bb.nph.us, %bb4.us + %.tmp.0.us = phi double [ 0.000000e+00, %bb.nph.us ], [ %8, %bb4.us ] + %storemerge23.us = phi i64 [ 0, %bb.nph.us ], [ %9, %bb4.us ] + %scevgep11 = getelementptr [8000 x [8000 x double]]* @A, i64 0, i64 %storemerge14.us, i64 %storemerge23.us + %scevgep12 = getelementptr [8000 x double]* @s, i64 0, i64 %storemerge23.us + %scevgep = getelementptr [8000 x double]* @p, i64 0, i64 %storemerge23.us + %2 = load double* %scevgep12, align 8 + %3 = load double* %scevgep11, align 8 + %4 = fmul double %10, %3 + %5 = fadd double %2, %4 + store double %5, double* %scevgep12, align 8 + %6 = load double* %scevgep, align 8 + %7 = fmul double %3, %6 + %8 = fadd double %.tmp.0.us, %7 + %9 = add nsw i64 %storemerge23.us, 1 + %exitcond1 = icmp eq i64 %9, 8000 + br i1 %exitcond1, label %bb6.us, label %bb4.us + +bb.nph.us: ; preds = %bb.nph.us.preheader, %bb6.us + %storemerge14.us = phi i64 [ %1, %bb6.us ], [ 0, %bb.nph.us.preheader ] + %scevgep16 = getelementptr [8000 x double]* @r, i64 0, i64 %storemerge14.us + %scevgep15 = getelementptr [8000 x double]* @q, i64 0, i64 %storemerge14.us + store double 0.000000e+00, double* %scevgep15, align 8 + %10 = load double* %scevgep16, align 8 + br label %bb4.us + +return: ; preds = %bb6.us + ret void +} +; CHECK: for region: 'bb => return' in function 'scop_func': +; CHECK-NEXT: scop_func(): diff --git a/polly/test/polybench/linear-algebra/kernels/doitgen/doitgen.c b/polly/test/polybench/linear-algebra/kernels/doitgen/doitgen.c new file mode 100755 index 00000000000..a8ae1387c32 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/doitgen/doitgen.c @@ -0,0 +1,141 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + + +/* Default problem size. */ +#ifndef NR +# define NR 128 +#endif +#ifndef NQ +# define NQ 128 +#endif +#ifndef NP +# define NP 128 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE A[NR][NQ][NP]; +DATA_TYPE sum[NR][NQ][NP]; +DATA_TYPE C4[NP][NP]; +#else +DATA_TYPE*** A = (DATA_TYPE***)malloc(NR * sizeof(DATA_TYPE**)); +DATA_TYPE*** sum = (DATA_TYPE***)malloc(NR * sizeof(DATA_TYPE**)); +DATA_TYPE** C4 = (DATA_TYPE**)malloc(NP * sizeof(DATA_TYPE*)); +{ + int i, j; + for (i = 0; i < NR; ++i) + { + A[i] = (DATA_TYPE**)malloc(NQ * sizeof(DATA_TYPE*)); + sum[i] = (DATA_TYPE**)malloc(NQ * sizeof(DATA_TYPE*)); + for (i = 0; i < NP; ++i) + { + A[i][j] = (DATA_TYPE*)malloc(NP * sizeof(DATA_TYPE)); + sum[i][j] = (DATA_TYPE*)malloc(NP * sizeof(DATA_TYPE)); + } + } + for (i = 0; i < NP; ++i) + C4[i] = (DATA_TYPE*)malloc(NP * sizeof(DATA_TYPE)); +} +#endif + +inline +void init_array() +{ + int i, j, k; + + for (i = 0; i < NR; i++) + for (j = 0; j < NQ; j++) + for (k = 0; k < NP; k++) + A[i][j][k] = ((DATA_TYPE) i*j + k) / NP; + for (i = 0; i < NP; i++) + for (j = 0; j < NP; j++) + C4[i][j] = ((DATA_TYPE) i*j) / NP; +} + + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j, k; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < NR; i++) + for (j = 0; j < NQ; j++) + for (k = 0; k < NP; k++) { + fprintf(stderr, "%0.2lf ", A[i][j][k]); + if ((i * NR + j * NQ + k)% 80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long nr = NR; + long nq = NQ; + long np = NP; +#else +void scop_func(long nr, long nq, long np) { +#endif + + long r, q, p, s; +#pragma scop +#pragma live-out A + + for (r = 0; r < nr; r++) + for (q = 0; q < nq; q++) { + for (p = 0; p < np; p++) { + sum[r][q][p] = 0; + for (s = 0; s < np; s++) + sum[r][q][p] = sum[r][q][p] + A[r][q][s] * C4[s][p]; + } + for (p = 0; p < np; p++) + A[r][q][p] = sum[r][q][p]; + } + + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int r, q, p, s; + int nr = NR; + int nq = NQ; + int np = NP; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(nr, nq, np); +#endif + + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/linear-algebra/kernels/doitgen/doitgen_with_param.ll b/polly/test/polybench/linear-algebra/kernels/doitgen/doitgen_with_param.ll new file mode 100644 index 00000000000..d7e6c066731 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/doitgen/doitgen_with_param.ll @@ -0,0 +1,104 @@ +; RUN: opt %loadPolly -correlated-propagation %defaultOpts -polly-cloog -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/doitgen/doitgen_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [128 x [128 x [128 x double]]] zeroinitializer, align 32 +@C4 = common global [128 x [128 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@sum = common global [128 x [128 x [128 x double]]] zeroinitializer, align 32 + +define void @scop_func(i64 %nr, i64 %nq, i64 %np) nounwind { +entry: + %0 = icmp sgt i64 %nr, 0 + br i1 %0, label %bb.nph50, label %return + +bb5.us: ; preds = %bb3.us + %.lcssa = phi double [ %5, %bb3.us ] + store double %.lcssa, double* %scevgep54 + %1 = add nsw i64 %storemerge26.us, 1 + %exitcond = icmp eq i64 %1, %np + br i1 %exitcond, label %bb9.loopexit, label %bb.nph.us + +bb3.us: ; preds = %bb.nph.us, %bb3.us + %.tmp.0.us = phi double [ 0.000000e+00, %bb.nph.us ], [ %5, %bb3.us ] + %storemerge45.us = phi i64 [ 0, %bb.nph.us ], [ %6, %bb3.us ] + %scevgep = getelementptr [128 x [128 x double]]* @C4, i64 0, i64 %storemerge45.us, i64 %storemerge26.us + %scevgep51 = getelementptr [128 x [128 x [128 x double]]]* @A, i64 0, i64 %storemerge43, i64 %storemerge113, i64 %storemerge45.us + %2 = load double* %scevgep51, align 8 + %3 = load double* %scevgep, align 8 + %4 = fmul double %2, %3 + %5 = fadd double %.tmp.0.us, %4 + %6 = add nsw i64 %storemerge45.us, 1 + %exitcond1 = icmp eq i64 %6, %np + br i1 %exitcond1, label %bb5.us, label %bb3.us + +bb.nph.us: ; preds = %bb.nph.us.preheader, %bb5.us + %storemerge26.us = phi i64 [ %1, %bb5.us ], [ 0, %bb.nph.us.preheader ] + %scevgep54 = getelementptr [128 x [128 x [128 x double]]]* @sum, i64 0, i64 %storemerge43, i64 %storemerge113, i64 %storemerge26.us + store double 0.000000e+00, double* %scevgep54, align 8 + br label %bb3.us + +bb8: ; preds = %bb8.preheader, %bb8 + %storemerge311 = phi i64 [ %8, %bb8 ], [ 0, %bb8.preheader ] + %scevgep62 = getelementptr [128 x [128 x [128 x double]]]* @sum, i64 0, i64 %storemerge43, i64 %storemerge113, i64 %storemerge311 + %scevgep61 = getelementptr [128 x [128 x [128 x double]]]* @A, i64 0, i64 %storemerge43, i64 %storemerge113, i64 %storemerge311 + %7 = load double* %scevgep62, align 8 + store double %7, double* %scevgep61, align 8 + %8 = add nsw i64 %storemerge311, 1 + %exitcond6 = icmp eq i64 %8, %np + br i1 %exitcond6, label %bb10.loopexit, label %bb8 + +bb9.loopexit: ; preds = %bb5.us + br i1 %14, label %bb8.preheader, label %bb10 + +bb8.preheader: ; preds = %bb9.loopexit + br label %bb8 + +bb10.loopexit: ; preds = %bb8 + br label %bb10 + +bb10: ; preds = %bb10.loopexit, %bb6.preheader, %bb9.loopexit + %storemerge12566 = phi i64 [ %storemerge113, %bb9.loopexit ], [ %storemerge113, %bb6.preheader ], [ %storemerge113, %bb10.loopexit ] + %storemerge4464 = phi i64 [ %storemerge43, %bb9.loopexit ], [ %storemerge43, %bb6.preheader ], [ %storemerge43, %bb10.loopexit ] + %9 = add nsw i64 %storemerge12566, 1 + %10 = icmp slt i64 %9, %nq + br i1 %10, label %bb6.preheader.backedge, label %bb12 + +bb6.preheader.backedge: ; preds = %bb10, %bb12 + %storemerge43.be = phi i64 [ %storemerge4464, %bb10 ], [ %11, %bb12 ] + %storemerge113.be = phi i64 [ %9, %bb10 ], [ 0, %bb12 ] + br label %bb6.preheader + +bb6.preheader: ; preds = %bb6.preheader.backedge, %bb6.preheader.preheader + %storemerge43 = phi i64 [ 0, %bb6.preheader.preheader ], [ %storemerge43.be, %bb6.preheader.backedge ] + %storemerge113 = phi i64 [ 0, %bb6.preheader.preheader ], [ %storemerge113.be, %bb6.preheader.backedge ] + br i1 %14, label %bb.nph.us.preheader, label %bb10 + +bb.nph.us.preheader: ; preds = %bb6.preheader + br label %bb.nph.us + +bb12: ; preds = %bb10 + %11 = add nsw i64 %storemerge4464, 1 + %12 = icmp slt i64 %11, %nr + br i1 %12, label %bb6.preheader.backedge, label %return.loopexit + +bb.nph50: ; preds = %entry + %13 = icmp sgt i64 %nq, 0 + %14 = icmp sgt i64 %np, 0 + br i1 %13, label %bb6.preheader.preheader, label %return + +bb6.preheader.preheader: ; preds = %bb.nph50 + br label %bb6.preheader + +return.loopexit: ; preds = %bb12 + br label %return + +return: ; preds = %return.loopexit, %bb.nph50, %entry + ret void +} +; CHECK: for region: 'entry.split => return' in function 'scop_func': diff --git a/polly/test/polybench/linear-algebra/kernels/doitgen/doitgen_without_param.ll b/polly/test/polybench/linear-algebra/kernels/doitgen/doitgen_without_param.ll new file mode 100644 index 00000000000..19247e93381 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/doitgen/doitgen_without_param.ll @@ -0,0 +1,79 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/doitgen/doitgen_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [128 x [128 x [128 x double]]] zeroinitializer, align 32 +@C4 = common global [128 x [128 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@sum = common global [128 x [128 x [128 x double]]] zeroinitializer, align 32 + +define void @scop_func() nounwind { +bb.nph50.bb.nph50.split_crit_edge: + br label %bb11.preheader + +bb5.us: ; preds = %bb3.us + %.lcssa = phi double [ %4, %bb3.us ] + store double %.lcssa, double* %scevgep54 + %0 = add nsw i64 %storemerge26.us, 1 + %exitcond = icmp eq i64 %0, 128 + br i1 %exitcond, label %bb8.loopexit, label %bb.nph.us + +bb3.us: ; preds = %bb.nph.us, %bb3.us + %.tmp.0.us = phi double [ 0.000000e+00, %bb.nph.us ], [ %4, %bb3.us ] + %storemerge45.us = phi i64 [ 0, %bb.nph.us ], [ %5, %bb3.us ] + %scevgep51 = getelementptr [128 x [128 x [128 x double]]]* @A, i64 0, i64 %storemerge30, i64 %storemerge113, i64 %storemerge45.us + %scevgep = getelementptr [128 x [128 x double]]* @C4, i64 0, i64 %storemerge45.us, i64 %storemerge26.us + %1 = load double* %scevgep51, align 8 + %2 = load double* %scevgep, align 8 + %3 = fmul double %1, %2 + %4 = fadd double %.tmp.0.us, %3 + %5 = add nsw i64 %storemerge45.us, 1 + %exitcond1 = icmp eq i64 %5, 128 + br i1 %exitcond1, label %bb5.us, label %bb3.us + +bb.nph.us: ; preds = %bb6.preheader, %bb5.us + %storemerge26.us = phi i64 [ %0, %bb5.us ], [ 0, %bb6.preheader ] + %scevgep54 = getelementptr [128 x [128 x [128 x double]]]* @sum, i64 0, i64 %storemerge30, i64 %storemerge113, i64 %storemerge26.us + store double 0.000000e+00, double* %scevgep54, align 8 + br label %bb3.us + +bb8.loopexit: ; preds = %bb5.us + br label %bb8 + +bb8: ; preds = %bb8.loopexit, %bb8 + %storemerge311 = phi i64 [ %7, %bb8 ], [ 0, %bb8.loopexit ] + %scevgep57 = getelementptr [128 x [128 x [128 x double]]]* @sum, i64 0, i64 %storemerge30, i64 %storemerge113, i64 %storemerge311 + %scevgep56 = getelementptr [128 x [128 x [128 x double]]]* @A, i64 0, i64 %storemerge30, i64 %storemerge113, i64 %storemerge311 + %6 = load double* %scevgep57, align 8 + store double %6, double* %scevgep56, align 8 + %7 = add nsw i64 %storemerge311, 1 + %exitcond6 = icmp eq i64 %7, 128 + br i1 %exitcond6, label %bb10, label %bb8 + +bb10: ; preds = %bb8 + %8 = add nsw i64 %storemerge113, 1 + %exitcond9 = icmp ne i64 %8, 128 + br i1 %exitcond9, label %bb6.preheader, label %bb12 + +bb6.preheader: ; preds = %bb11.preheader, %bb10 + %storemerge113 = phi i64 [ %8, %bb10 ], [ 0, %bb11.preheader ] + br label %bb.nph.us + +bb12: ; preds = %bb10 + %9 = add nsw i64 %storemerge30, 1 + %exitcond14 = icmp ne i64 %9, 128 + br i1 %exitcond14, label %bb11.preheader, label %return + +bb11.preheader: ; preds = %bb12, %bb.nph50.bb.nph50.split_crit_edge + %storemerge30 = phi i64 [ 0, %bb.nph50.bb.nph50.split_crit_edge ], [ %9, %bb12 ] + br label %bb6.preheader + +return: ; preds = %bb12 + ret void +} +; CHECK: Valid Region for Scop: bb11.preheader => return diff --git a/polly/test/polybench/linear-algebra/kernels/gemm/compiler.opts b/polly/test/polybench/linear-algebra/kernels/gemm/compiler.opts new file mode 100755 index 00000000000..954e6c49012 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/gemm/compiler.opts @@ -0,0 +1 @@ +-DNI=1024 -DNJ=1024 -DNK=1024 diff --git a/polly/test/polybench/linear-algebra/kernels/gemm/gemm.c b/polly/test/polybench/linear-algebra/kernels/gemm/gemm.c new file mode 100755 index 00000000000..910c3bc42f1 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/gemm/gemm.c @@ -0,0 +1,139 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + + +/* Default problem size. */ +#ifndef NI +# define NI 512 +#endif +#ifndef NJ +# define NJ 512 +#endif +#ifndef NK +# define NK 512 +#endif + + +/* Default data type is double (dgemm). */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +DATA_TYPE alpha; +DATA_TYPE beta; +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE C[NI][NJ]; +DATA_TYPE A[NI][NK]; +DATA_TYPE B[NK][NJ]; +#else +DATA_TYPE** C = (DATA_TYPE**)malloc(NI * sizeof(DATA_TYPE*)); +DATA_TYPE** A = (DATA_TYPE**)malloc(NI * sizeof(DATA_TYPE*)); +DATA_TYPE** B = (DATA_TYPE**)malloc(NK * sizeof(DATA_TYPE*)); +{ + int i; + for (i = 0; i < NI; ++i) + { + C[i] = (DATA_TYPE*)malloc(NJ * sizeof(DATA_TYPE)); + A[i] = (DATA_TYPE*)malloc(NK * sizeof(DATA_TYPE)); + } + for (i = 0; i < NK; ++i) + B[i] = (DATA_TYPE*)malloc(NJ * sizeof(DATA_TYPE)); +} +#endif + + +inline +void init_array() +{ + int i, j; + + alpha = 32412; + beta = 2123; + for (i = 0; i < NI; i++) + for (j = 0; j < NK; j++) + A[i][j] = ((DATA_TYPE) i*j)/NI; + for (i = 0; i < NK; i++) + for (j = 0; j < NJ; j++) + B[i][j] = ((DATA_TYPE) i*j + 1)/NJ; + for (i = 0; i < NI; i++) + for (j = 0; j < NJ; j++) + C[i][j] = ((DATA_TYPE) i*j + 2)/NJ; +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < NI; i++) { + for (j = 0; j < NJ; j++) { + fprintf(stderr, "%0.2lf ", C[i][j]); + if ((i * NI + j) % 80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long ni = NI; + long nj = NJ; + long nk = NK; +#else +void scop_func(long ni, long nj, long nk) { +#endif + long i, j, k; + +#pragma scop +#pragma live-out C + + /* C := alpha*A*B + beta*C */ + for (i = 0; i < ni; i++) + for (j = 0; j < nj; j++) + { + C[i][j] *= beta; + for (k = 0; k < nk; ++k) + C[i][j] += alpha * A[i][k] * B[k][j]; + } + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int i, j, k; + int ni = NI; + int nj = NJ; + int nk = NK; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(ni, nj, nk); +#endif + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/linear-algebra/kernels/gemm/gemm_with_param.ll b/polly/test/polybench/linear-algebra/kernels/gemm/gemm_with_param.ll new file mode 100644 index 00000000000..11e7b5893c4 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/gemm/gemm_with_param.ll @@ -0,0 +1,98 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -polly-cloog -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/gemm/gemm_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@alpha = common global double 0.000000e+00 +@beta = common global double 0.000000e+00 +@A = common global [512 x [512 x double]] zeroinitializer, align 32 +@B = common global [512 x [512 x double]] zeroinitializer, align 32 +@C = common global [512 x [512 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func(i64 %ni, i64 %nj, i64 %nk) nounwind { +entry: + %0 = icmp sgt i64 %ni, 0 + br i1 %0, label %bb.nph26, label %return + +bb.nph8: ; preds = %bb.nph8.preheader, %bb6 + %indvar3 = phi i64 [ 0, %bb.nph8.preheader ], [ %indvar.next4, %bb6 ] + br i1 %14, label %bb.nph.us.preheader, label %bb4.preheader + +bb.nph.us.preheader: ; preds = %bb.nph8 + br label %bb.nph.us + +bb4.preheader: ; preds = %bb.nph8 + br label %bb4 + +bb4.us: ; preds = %bb2.us + %.lcssa = phi double [ %6, %bb2.us ] + store double %.lcssa, double* %scevgep30 + %1 = add nsw i64 %storemerge14.us, 1 + %exitcond = icmp eq i64 %1, %nj + br i1 %exitcond, label %bb6.loopexit1, label %bb.nph.us + +bb2.us: ; preds = %bb.nph.us, %bb2.us + %.tmp.0.us = phi double [ %9, %bb.nph.us ], [ %6, %bb2.us ] + %storemerge23.us = phi i64 [ 0, %bb.nph.us ], [ %7, %bb2.us ] + %scevgep27 = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %indvar3, i64 %storemerge23.us + %scevgep = getelementptr [512 x [512 x double]]* @B, i64 0, i64 %storemerge23.us, i64 %storemerge14.us + %2 = load double* %scevgep27, align 8 + %3 = fmul double %2, %15 + %4 = load double* %scevgep, align 8 + %5 = fmul double %3, %4 + %6 = fadd double %.tmp.0.us, %5 + %7 = add nsw i64 %storemerge23.us, 1 + %exitcond6 = icmp eq i64 %7, %nk + br i1 %exitcond6, label %bb4.us, label %bb2.us + +bb.nph.us: ; preds = %bb.nph.us.preheader, %bb4.us + %storemerge14.us = phi i64 [ %1, %bb4.us ], [ 0, %bb.nph.us.preheader ] + %scevgep30 = getelementptr [512 x [512 x double]]* @C, i64 0, i64 %indvar3, i64 %storemerge14.us + %8 = load double* %scevgep30, align 8 + %9 = fmul double %8, %13 + store double %9, double* %scevgep30, align 8 + br label %bb2.us + +bb4: ; preds = %bb4.preheader, %bb4 + %indvar = phi i64 [ %indvar.next, %bb4 ], [ 0, %bb4.preheader ] + %scevgep35 = getelementptr [512 x [512 x double]]* @C, i64 0, i64 %indvar3, i64 %indvar + %10 = load double* %scevgep35, align 8 + %11 = fmul double %10, %13 + store double %11, double* %scevgep35, align 8 + %indvar.next = add i64 %indvar, 1 + %exitcond2 = icmp eq i64 %indvar.next, %nj + br i1 %exitcond2, label %bb6.loopexit, label %bb4 + +bb6.loopexit: ; preds = %bb4 + br label %bb6 + +bb6.loopexit1: ; preds = %bb4.us + br label %bb6 + +bb6: ; preds = %bb6.loopexit1, %bb6.loopexit + %indvar.next4 = add i64 %indvar3, 1 + %exitcond11 = icmp ne i64 %indvar.next4, %ni + br i1 %exitcond11, label %bb.nph8, label %return.loopexit + +bb.nph26: ; preds = %entry + %12 = icmp sgt i64 %nj, 0 + %13 = load double* @beta, align 8 + %14 = icmp sgt i64 %nk, 0 + %15 = load double* @alpha, align 8 + br i1 %12, label %bb.nph8.preheader, label %return + +bb.nph8.preheader: ; preds = %bb.nph26 + br label %bb.nph8 + +return.loopexit: ; preds = %bb6 + br label %return + +return: ; preds = %return.loopexit, %bb.nph26, %entry + ret void +} +; CHECK: for region: 'entry.split => return' in function 'scop_func': diff --git a/polly/test/polybench/linear-algebra/kernels/gemm/gemm_without_param.ll b/polly/test/polybench/linear-algebra/kernels/gemm/gemm_without_param.ll new file mode 100644 index 00000000000..b7b125ec87b --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/gemm/gemm_without_param.ll @@ -0,0 +1,64 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/gemm/gemm_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@alpha = common global double 0.000000e+00 +@beta = common global double 0.000000e+00 +@A = common global [512 x [512 x double]] zeroinitializer, align 32 +@B = common global [512 x [512 x double]] zeroinitializer, align 32 +@C = common global [512 x [512 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func() nounwind { +bb.nph26.bb.nph26.split_crit_edge: + %0 = load double* @beta, align 8 + %1 = load double* @alpha, align 8 + br label %bb5.preheader + +bb4.us: ; preds = %bb2.us + %.lcssa = phi double [ %7, %bb2.us ] + store double %.lcssa, double* %scevgep30 + %2 = add nsw i64 %storemerge14.us, 1 + %exitcond = icmp eq i64 %2, 512 + br i1 %exitcond, label %bb6, label %bb.nph.us + +bb2.us: ; preds = %bb.nph.us, %bb2.us + %.tmp.0.us = phi double [ %10, %bb.nph.us ], [ %7, %bb2.us ] + %storemerge23.us = phi i64 [ 0, %bb.nph.us ], [ %8, %bb2.us ] + %scevgep27 = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %storemerge9, i64 %storemerge23.us + %scevgep = getelementptr [512 x [512 x double]]* @B, i64 0, i64 %storemerge23.us, i64 %storemerge14.us + %3 = load double* %scevgep27, align 8 + %4 = fmul double %3, %1 + %5 = load double* %scevgep, align 8 + %6 = fmul double %4, %5 + %7 = fadd double %.tmp.0.us, %6 + %8 = add nsw i64 %storemerge23.us, 1 + %exitcond1 = icmp eq i64 %8, 512 + br i1 %exitcond1, label %bb4.us, label %bb2.us + +bb.nph.us: ; preds = %bb5.preheader, %bb4.us + %storemerge14.us = phi i64 [ %2, %bb4.us ], [ 0, %bb5.preheader ] + %scevgep30 = getelementptr [512 x [512 x double]]* @C, i64 0, i64 %storemerge9, i64 %storemerge14.us + %9 = load double* %scevgep30, align 8 + %10 = fmul double %9, %0 + store double %10, double* %scevgep30, align 8 + br label %bb2.us + +bb6: ; preds = %bb4.us + %11 = add nsw i64 %storemerge9, 1 + %exitcond6 = icmp ne i64 %11, 512 + br i1 %exitcond6, label %bb5.preheader, label %return + +bb5.preheader: ; preds = %bb6, %bb.nph26.bb.nph26.split_crit_edge + %storemerge9 = phi i64 [ 0, %bb.nph26.bb.nph26.split_crit_edge ], [ %11, %bb6 ] + br label %bb.nph.us + +return: ; preds = %bb6 + ret void +} +; CHECK: Valid Region for Scop: bb5.preheader => return diff --git a/polly/test/polybench/linear-algebra/kernels/gemver/gemver.c b/polly/test/polybench/linear-algebra/kernels/gemver/gemver.c new file mode 100755 index 00000000000..3e449a33778 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/gemver/gemver.c @@ -0,0 +1,146 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + + +/* Default problem size. */ +#ifndef N +# define N 4000 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +DATA_TYPE alpha; +DATA_TYPE beta; +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE A[N][N]; +DATA_TYPE B[N][N]; +DATA_TYPE x[N]; +DATA_TYPE u1[N]; +DATA_TYPE u2[N]; +DATA_TYPE v2[N]; +DATA_TYPE v1[N]; +DATA_TYPE w[N]; +DATA_TYPE y[N]; +DATA_TYPE z[N]; +#else +DATA_TYPE** A = (DATA_TYPE**)malloc(N * sizeof(DATA_TYPE*)); +DATA_TYPE** B = (DATA_TYPE**)malloc(N * sizeof(DATA_TYPE*)); +DATA_TYPE* x = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +DATA_TYPE* u1 = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +DATA_TYPE* u2 = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +DATA_TYPE* v1 = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +DATA_TYPE* v2 = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +DATA_TYPE* w = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +DATA_TYPE* y = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +DATA_TYPE* z = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +{ + int i; + for (i = 0; i < N; ++i) + { + A[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); + B[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); + } +} +#endif + +inline +void init_array() +{ + int i, j; + + alpha = 43532; + beta = 12313; + for (i = 0; i < N; i++) + { + u1[i] = i; + u2[i] = (i+1)/N/2.0; + v1[i] = (i+1)/N/4.0; + v2[i] = (i+1)/N/6.0; + y[i] = (i+1)/N/8.0; + z[i] = (i+1)/N/9.0; + x[i] = 0.0; + w[i] = 0.0; + for (j = 0; j < N; j++) + A[i][j] = ((DATA_TYPE) i*j) / N; + } +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < N; i++) { + fprintf(stderr, "%0.2lf ", w[i]); + if (i%80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long n = N; +#else +void scop_func(long n) { +#endif + long i, j; + +#pragma scop +#pragma live-out w + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + A[i][j] = A[i][j] + u1[i] * v1[j] + u2[i] * v2[j]; + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + x[i] = x[i] + beta * A[j][i] * y[j]; + + for (i = 0; i < n; i++) + x[i] = x[i] + z[i]; + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + w[i] = w[i] + alpha * A[i][j] * x[j]; + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int i, j; + int n = N; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(n); +#endif + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/linear-algebra/kernels/gemver/gemver_with_param.ll b/polly/test/polybench/linear-algebra/kernels/gemver/gemver_with_param.ll new file mode 100644 index 00000000000..50bc1633daf --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/gemver/gemver_with_param.ll @@ -0,0 +1,154 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -polly-cloog -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/gemver/gemver_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@alpha = common global double 0.000000e+00 +@beta = common global double 0.000000e+00 +@u1 = common global [4000 x double] zeroinitializer, align 32 +@u2 = common global [4000 x double] zeroinitializer, align 32 +@v1 = common global [4000 x double] zeroinitializer, align 32 +@v2 = common global [4000 x double] zeroinitializer, align 32 +@y = common global [4000 x double] zeroinitializer, align 32 +@z = common global [4000 x double] zeroinitializer, align 32 +@x = common global [4000 x double] zeroinitializer, align 32 +@w = common global [4000 x double] zeroinitializer, align 32 +@A = common global [4000 x [4000 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@B = common global [4000 x [4000 x double]] zeroinitializer, align 32 + +define void @scop_func(i64 %n) nounwind { +entry: + %0 = icmp sgt i64 %n, 0 + br i1 %0, label %bb.nph40.preheader, label %return + +bb.nph40.preheader: ; preds = %entry + br label %bb.nph40 + +bb.nph40: ; preds = %bb.nph40.preheader, %bb3 + %i.041 = phi i64 [ %11, %bb3 ], [ 0, %bb.nph40.preheader ] + %scevgep66 = getelementptr [4000 x double]* @u1, i64 0, i64 %i.041 + %scevgep67 = getelementptr [4000 x double]* @u2, i64 0, i64 %i.041 + %1 = load double* %scevgep66, align 8 + %2 = load double* %scevgep67, align 8 + br label %bb1 + +bb1: ; preds = %bb1, %bb.nph40 + %j.039 = phi i64 [ 0, %bb.nph40 ], [ %10, %bb1 ] + %scevgep63 = getelementptr [4000 x [4000 x double]]* @A, i64 0, i64 %i.041, i64 %j.039 + %scevgep62 = getelementptr [4000 x double]* @v2, i64 0, i64 %j.039 + %scevgep61 = getelementptr [4000 x double]* @v1, i64 0, i64 %j.039 + %3 = load double* %scevgep63, align 8 + %4 = load double* %scevgep61, align 8 + %5 = fmul double %1, %4 + %6 = fadd double %3, %5 + %7 = load double* %scevgep62, align 8 + %8 = fmul double %2, %7 + %9 = fadd double %6, %8 + store double %9, double* %scevgep63, align 8 + %10 = add nsw i64 %j.039, 1 + %exitcond16 = icmp eq i64 %10, %n + br i1 %exitcond16, label %bb3, label %bb1 + +bb3: ; preds = %bb1 + %11 = add nsw i64 %i.041, 1 + %exitcond20 = icmp eq i64 %11, %n + br i1 %exitcond20, label %bb10.preheader, label %bb.nph40 + +bb10.preheader: ; preds = %bb3 + br i1 %0, label %bb.nph38.bb.nph38.split_crit_edge, label %return + +bb.nph30: ; preds = %bb.nph38.bb.nph38.split_crit_edge, %bb9 + %i.134 = phi i64 [ 0, %bb.nph38.bb.nph38.split_crit_edge ], [ %18, %bb9 ] + %scevgep59 = getelementptr [4000 x double]* @x, i64 0, i64 %i.134 + %.promoted31 = load double* %scevgep59 + br label %bb7 + +bb7: ; preds = %bb7, %bb.nph30 + %.tmp.032 = phi double [ %.promoted31, %bb.nph30 ], [ %16, %bb7 ] + %j.129 = phi i64 [ 0, %bb.nph30 ], [ %17, %bb7 ] + %scevgep56 = getelementptr [4000 x [4000 x double]]* @A, i64 0, i64 %j.129, i64 %i.134 + %scevgep55 = getelementptr [4000 x double]* @y, i64 0, i64 %j.129 + %12 = load double* %scevgep56, align 8 + %13 = fmul double %12, %19 + %14 = load double* %scevgep55, align 8 + %15 = fmul double %13, %14 + %16 = fadd double %.tmp.032, %15 + %17 = add nsw i64 %j.129, 1 + %exitcond10 = icmp eq i64 %17, %n + br i1 %exitcond10, label %bb9, label %bb7 + +bb9: ; preds = %bb7 + %.lcssa9 = phi double [ %16, %bb7 ] + store double %.lcssa9, double* %scevgep59 + %18 = add nsw i64 %i.134, 1 + %exitcond13 = icmp eq i64 %18, %n + br i1 %exitcond13, label %bb13.preheader, label %bb.nph30 + +bb.nph38.bb.nph38.split_crit_edge: ; preds = %bb10.preheader + %19 = load double* @beta, align 8 + br label %bb.nph30 + +bb13.preheader: ; preds = %bb9 + br i1 %0, label %bb12.preheader, label %return + +bb12.preheader: ; preds = %bb13.preheader + br label %bb12 + +bb12: ; preds = %bb12.preheader, %bb12 + %i.227 = phi i64 [ %23, %bb12 ], [ 0, %bb12.preheader ] + %scevgep52 = getelementptr [4000 x double]* @z, i64 0, i64 %i.227 + %scevgep51 = getelementptr [4000 x double]* @x, i64 0, i64 %i.227 + %20 = load double* %scevgep51, align 8 + %21 = load double* %scevgep52, align 8 + %22 = fadd double %20, %21 + store double %22, double* %scevgep51, align 8 + %23 = add nsw i64 %i.227, 1 + %exitcond6 = icmp eq i64 %23, %n + br i1 %exitcond6, label %bb19.preheader, label %bb12 + +bb19.preheader: ; preds = %bb12 + br i1 %0, label %bb.nph26.bb.nph26.split_crit_edge, label %return + +bb.nph: ; preds = %bb.nph26.bb.nph26.split_crit_edge, %bb18 + %i.322 = phi i64 [ 0, %bb.nph26.bb.nph26.split_crit_edge ], [ %30, %bb18 ] + %scevgep49 = getelementptr [4000 x double]* @w, i64 0, i64 %i.322 + %.promoted = load double* %scevgep49 + br label %bb16 + +bb16: ; preds = %bb16, %bb.nph + %.tmp.0 = phi double [ %.promoted, %bb.nph ], [ %28, %bb16 ] + %j.221 = phi i64 [ 0, %bb.nph ], [ %29, %bb16 ] + %scevgep46 = getelementptr [4000 x [4000 x double]]* @A, i64 0, i64 %i.322, i64 %j.221 + %scevgep = getelementptr [4000 x double]* @x, i64 0, i64 %j.221 + %24 = load double* %scevgep46, align 8 + %25 = fmul double %24, %31 + %26 = load double* %scevgep, align 8 + %27 = fmul double %25, %26 + %28 = fadd double %.tmp.0, %27 + %29 = add nsw i64 %j.221, 1 + %exitcond1 = icmp eq i64 %29, %n + br i1 %exitcond1, label %bb18, label %bb16 + +bb18: ; preds = %bb16 + %.lcssa = phi double [ %28, %bb16 ] + store double %.lcssa, double* %scevgep49 + %30 = add nsw i64 %i.322, 1 + %exitcond = icmp eq i64 %30, %n + br i1 %exitcond, label %return.loopexit, label %bb.nph + +bb.nph26.bb.nph26.split_crit_edge: ; preds = %bb19.preheader + %31 = load double* @alpha, align 8 + br label %bb.nph + +return.loopexit: ; preds = %bb18 + br label %return + +return: ; preds = %return.loopexit, %bb19.preheader, %bb13.preheader, %bb10.preheader, %entry + ret void +} +; CHECK: for region: 'entry.split => return' in function 'scop_func': diff --git a/polly/test/polybench/linear-algebra/kernels/gemver/gemver_without_param.ll b/polly/test/polybench/linear-algebra/kernels/gemver/gemver_without_param.ll new file mode 100644 index 00000000000..1802b51cb13 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/gemver/gemver_without_param.ll @@ -0,0 +1,138 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -polly-cloog -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/gemver/gemver_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@alpha = common global double 0.000000e+00 +@beta = common global double 0.000000e+00 +@u1 = common global [4000 x double] zeroinitializer, align 32 +@u2 = common global [4000 x double] zeroinitializer, align 32 +@v1 = common global [4000 x double] zeroinitializer, align 32 +@v2 = common global [4000 x double] zeroinitializer, align 32 +@y = common global [4000 x double] zeroinitializer, align 32 +@z = common global [4000 x double] zeroinitializer, align 32 +@x = common global [4000 x double] zeroinitializer, align 32 +@w = common global [4000 x double] zeroinitializer, align 32 +@A = common global [4000 x [4000 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@B = common global [4000 x [4000 x double]] zeroinitializer, align 32 + +define void @scop_func() nounwind { +bb.nph31.bb.nph31.split_crit_edge: + br label %bb.nph26 + +bb.nph26: ; preds = %bb3, %bb.nph31.bb.nph31.split_crit_edge + %storemerge27 = phi i64 [ 0, %bb.nph31.bb.nph31.split_crit_edge ], [ %10, %bb3 ] + %scevgep52 = getelementptr [4000 x double]* @u1, i64 0, i64 %storemerge27 + %scevgep53 = getelementptr [4000 x double]* @u2, i64 0, i64 %storemerge27 + %0 = load double* %scevgep52, align 8 + %1 = load double* %scevgep53, align 8 + br label %bb1 + +bb1: ; preds = %bb1, %bb.nph26 + %storemerge625 = phi i64 [ 0, %bb.nph26 ], [ %9, %bb1 ] + %scevgep47 = getelementptr [4000 x [4000 x double]]* @A, i64 0, i64 %storemerge27, i64 %storemerge625 + %scevgep49 = getelementptr [4000 x double]* @v2, i64 0, i64 %storemerge625 + %scevgep48 = getelementptr [4000 x double]* @v1, i64 0, i64 %storemerge625 + %2 = load double* %scevgep47, align 8 + %3 = load double* %scevgep48, align 8 + %4 = fmul double %0, %3 + %5 = fadd double %2, %4 + %6 = load double* %scevgep49, align 8 + %7 = fmul double %1, %6 + %8 = fadd double %5, %7 + store double %8, double* %scevgep47, align 8 + %9 = add nsw i64 %storemerge625, 1 + %exitcond16 = icmp eq i64 %9, 4000 + br i1 %exitcond16, label %bb3, label %bb1 + +bb3: ; preds = %bb1 + %10 = add nsw i64 %storemerge27, 1 + %exitcond20 = icmp eq i64 %10, 4000 + br i1 %exitcond20, label %bb.nph24.bb.nph24.split_crit_edge, label %bb.nph26 + +bb.nph16: ; preds = %bb.nph24.bb.nph24.split_crit_edge, %bb9 + %storemerge120 = phi i64 [ 0, %bb.nph24.bb.nph24.split_crit_edge ], [ %17, %bb9 ] + %scevgep45 = getelementptr [4000 x double]* @x, i64 0, i64 %storemerge120 + %.promoted17 = load double* %scevgep45 + br label %bb7 + +bb7: ; preds = %bb7, %bb.nph16 + %.tmp.018 = phi double [ %.promoted17, %bb.nph16 ], [ %15, %bb7 ] + %storemerge515 = phi i64 [ 0, %bb.nph16 ], [ %16, %bb7 ] + %scevgep42 = getelementptr [4000 x [4000 x double]]* @A, i64 0, i64 %storemerge515, i64 %storemerge120 + %scevgep41 = getelementptr [4000 x double]* @y, i64 0, i64 %storemerge515 + %11 = load double* %scevgep42, align 8 + %12 = fmul double %11, %18 + %13 = load double* %scevgep41, align 8 + %14 = fmul double %12, %13 + %15 = fadd double %.tmp.018, %14 + %16 = add nsw i64 %storemerge515, 1 + %exitcond10 = icmp eq i64 %16, 4000 + br i1 %exitcond10, label %bb9, label %bb7 + +bb9: ; preds = %bb7 + %.lcssa9 = phi double [ %15, %bb7 ] + store double %.lcssa9, double* %scevgep45 + %17 = add nsw i64 %storemerge120, 1 + %exitcond13 = icmp eq i64 %17, 4000 + br i1 %exitcond13, label %bb12.preheader, label %bb.nph16 + +bb12.preheader: ; preds = %bb9 + br label %bb12 + +bb.nph24.bb.nph24.split_crit_edge: ; preds = %bb3 + %18 = load double* @beta, align 8 + br label %bb.nph16 + +bb12: ; preds = %bb12.preheader, %bb12 + %storemerge213 = phi i64 [ %22, %bb12 ], [ 0, %bb12.preheader ] + %scevgep38 = getelementptr [4000 x double]* @x, i64 0, i64 %storemerge213 + %scevgep37 = getelementptr [4000 x double]* @z, i64 0, i64 %storemerge213 + %19 = load double* %scevgep38, align 8 + %20 = load double* %scevgep37, align 8 + %21 = fadd double %19, %20 + store double %21, double* %scevgep38, align 8 + %22 = add nsw i64 %storemerge213, 1 + %exitcond6 = icmp eq i64 %22, 4000 + br i1 %exitcond6, label %bb.nph12.bb.nph12.split_crit_edge, label %bb12 + +bb.nph: ; preds = %bb.nph12.bb.nph12.split_crit_edge, %bb18 + %storemerge38 = phi i64 [ 0, %bb.nph12.bb.nph12.split_crit_edge ], [ %29, %bb18 ] + %scevgep35 = getelementptr [4000 x double]* @w, i64 0, i64 %storemerge38 + %.promoted = load double* %scevgep35 + br label %bb16 + +bb16: ; preds = %bb16, %bb.nph + %.tmp.0 = phi double [ %.promoted, %bb.nph ], [ %27, %bb16 ] + %storemerge47 = phi i64 [ 0, %bb.nph ], [ %28, %bb16 ] + %scevgep32 = getelementptr [4000 x [4000 x double]]* @A, i64 0, i64 %storemerge38, i64 %storemerge47 + %scevgep = getelementptr [4000 x double]* @x, i64 0, i64 %storemerge47 + %23 = load double* %scevgep32, align 8 + %24 = fmul double %23, %30 + %25 = load double* %scevgep, align 8 + %26 = fmul double %24, %25 + %27 = fadd double %.tmp.0, %26 + %28 = add nsw i64 %storemerge47, 1 + %exitcond1 = icmp eq i64 %28, 4000 + br i1 %exitcond1, label %bb18, label %bb16 + +bb18: ; preds = %bb16 + %.lcssa = phi double [ %27, %bb16 ] + store double %.lcssa, double* %scevgep35 + %29 = add nsw i64 %storemerge38, 1 + %exitcond = icmp eq i64 %29, 4000 + br i1 %exitcond, label %return, label %bb.nph + +bb.nph12.bb.nph12.split_crit_edge: ; preds = %bb12 + %30 = load double* @alpha, align 8 + br label %bb.nph + +return: ; preds = %bb18 + ret void +} +; CHECK: for region: 'bb.nph26 => return' in function 'scop_func': diff --git a/polly/test/polybench/linear-algebra/kernels/gesummv/gesummv.c b/polly/test/polybench/linear-algebra/kernels/gesummv/gesummv.c new file mode 100755 index 00000000000..dbaa60281a7 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/gesummv/gesummv.c @@ -0,0 +1,127 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + + +/* Default problem size. */ +#ifndef N +# define N 4000 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +DATA_TYPE alpha; +DATA_TYPE beta; +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE A[N][N]; +DATA_TYPE B[N][N]; +DATA_TYPE x[N]; +DATA_TYPE y[N]; +DATA_TYPE tmp[N]; +#else +DATA_TYPE** A = (DATA_TYPE**)malloc(N * sizeof(DATA_TYPE*)); +DATA_TYPE** B = (DATA_TYPE**)malloc(N * sizeof(DATA_TYPE*)); +DATA_TYPE* x = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +DATA_TYPE* y = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +DATA_TYPE* tmp = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +{ + int i; + for (i = 0; i < N; ++i) + { + A[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); + B[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); + } +} +#endif + +inline +void init_array() +{ + int i, j; + + alpha = 43532; + beta = 12313; + for (i = 0; i < N; i++) + { + x[i] = ((DATA_TYPE) i) / N; + for (j = 0; j < N; j++) + A[i][j] = ((DATA_TYPE) i*j) / N; + } +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < N; i++) { + fprintf(stderr, "%0.2lf ", y[i]); + if (i%80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long n = N; +#else +void scop_func(long n) { +#endif + long i, j; + +#pragma scop +#pragma live-out y + + for (i = 0; i < n; i++) + { + tmp[i] = 0; + y[i] = 0; + for (j = 0; j < n; j++) + { + tmp[i] = A[i][j] * x[j] + tmp[i]; + y[i] = B[i][j] * x[j] + y[i]; + } + y[i] = alpha * tmp[i] + beta * y[i]; + } + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int i, j; + int n = N; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(n); +#endif + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/linear-algebra/kernels/gesummv/gesummv_with_param.ll b/polly/test/polybench/linear-algebra/kernels/gesummv/gesummv_with_param.ll new file mode 100644 index 00000000000..d143b3cc7ac --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/gesummv/gesummv_with_param.ll @@ -0,0 +1,73 @@ +; RUN: opt %loadPolly %defaultOpts -polly-cloog -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/gesummv/gesummv_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@alpha = common global double 0.000000e+00 +@beta = common global double 0.000000e+00 +@x = common global [4000 x double] zeroinitializer, align 32 +@A = common global [4000 x [4000 x double]] zeroinitializer, align 32 +@y = common global [4000 x double] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@tmp = common global [4000 x double] zeroinitializer, align 32 +@B = common global [4000 x [4000 x double]] zeroinitializer, align 32 + +define void @scop_func(i64 %n) nounwind { +entry: + %0 = icmp sgt i64 %n, 0 + br i1 %0, label %bb.nph10.split.us, label %return + +bb.nph10.split.us: ; preds = %entry + %1 = load double* @alpha, align 8 + %2 = load double* @beta, align 8 + br label %bb.nph.us + +bb3.us: ; preds = %bb1.us + %.lcssa1 = phi double [ %13, %bb1.us ] + %.lcssa = phi double [ %10, %bb1.us ] + store double %.lcssa, double* %scevgep17 + %3 = fmul double %.lcssa, %1 + %4 = fmul double %.lcssa1, %2 + %5 = fadd double %3, %4 + store double %5, double* %scevgep18, align 8 + %6 = add nsw i64 %storemerge6.us, 1 + %exitcond = icmp eq i64 %6, %n + br i1 %exitcond, label %return.loopexit, label %bb.nph.us + +bb1.us: ; preds = %bb.nph.us, %bb1.us + %.tmp3.0.us = phi double [ 0.000000e+00, %bb.nph.us ], [ %13, %bb1.us ] + %.tmp.0.us = phi double [ 0.000000e+00, %bb.nph.us ], [ %10, %bb1.us ] + %storemerge12.us = phi i64 [ 0, %bb.nph.us ], [ %14, %bb1.us ] + %scevgep13 = getelementptr [4000 x [4000 x double]]* @A, i64 0, i64 %storemerge6.us, i64 %storemerge12.us + %scevgep = getelementptr [4000 x [4000 x double]]* @B, i64 0, i64 %storemerge6.us, i64 %storemerge12.us + %scevgep12 = getelementptr [4000 x double]* @x, i64 0, i64 %storemerge12.us + %7 = load double* %scevgep13, align 8 + %8 = load double* %scevgep12, align 8 + %9 = fmul double %7, %8 + %10 = fadd double %9, %.tmp.0.us + %11 = load double* %scevgep, align 8 + %12 = fmul double %11, %8 + %13 = fadd double %12, %.tmp3.0.us + %14 = add nsw i64 %storemerge12.us, 1 + %exitcond2 = icmp eq i64 %14, %n + br i1 %exitcond2, label %bb3.us, label %bb1.us + +bb.nph.us: ; preds = %bb3.us, %bb.nph10.split.us + %storemerge6.us = phi i64 [ 0, %bb.nph10.split.us ], [ %6, %bb3.us ] + %scevgep18 = getelementptr [4000 x double]* @y, i64 0, i64 %storemerge6.us + %scevgep17 = getelementptr [4000 x double]* @tmp, i64 0, i64 %storemerge6.us + store double 0.000000e+00, double* %scevgep17, align 8 + store double 0.000000e+00, double* %scevgep18, align 8 + br label %bb1.us + +return.loopexit: ; preds = %bb3.us + br label %return + +return: ; preds = %return.loopexit, %entry + ret void +} +; CHECK: for region: 'entry.split => return' in function 'scop_func': diff --git a/polly/test/polybench/linear-algebra/kernels/gesummv/gesummv_without_param.ll b/polly/test/polybench/linear-algebra/kernels/gesummv/gesummv_without_param.ll new file mode 100644 index 00000000000..95430e23518 --- /dev/null +++ b/polly/test/polybench/linear-algebra/kernels/gesummv/gesummv_without_param.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/kernels/gesummv/gesummv_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@alpha = common global double 0.000000e+00 +@beta = common global double 0.000000e+00 +@x = common global [4000 x double] zeroinitializer, align 32 +@A = common global [4000 x [4000 x double]] zeroinitializer, align 32 +@y = common global [4000 x double] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@tmp = common global [4000 x double] zeroinitializer, align 32 +@B = common global [4000 x [4000 x double]] zeroinitializer, align 32 + +define void @scop_func() nounwind { +bb.nph10.split.us: + %0 = load double* @alpha, align 8 + %1 = load double* @beta, align 8 + br label %bb.nph.us + +bb3.us: ; preds = %bb1.us + %.lcssa1 = phi double [ %12, %bb1.us ] + %.lcssa = phi double [ %9, %bb1.us ] + store double %.lcssa, double* %scevgep17 + %2 = fmul double %.lcssa, %0 + %3 = fmul double %.lcssa1, %1 + %4 = fadd double %2, %3 + store double %4, double* %scevgep18, align 8 + %5 = add nsw i64 %storemerge6.us, 1 + %exitcond = icmp eq i64 %5, 4000 + br i1 %exitcond, label %return, label %bb.nph.us + +bb1.us: ; preds = %bb.nph.us, %bb1.us + %.tmp3.0.us = phi double [ 0.000000e+00, %bb.nph.us ], [ %12, %bb1.us ] + %.tmp.0.us = phi double [ 0.000000e+00, %bb.nph.us ], [ %9, %bb1.us ] + %storemerge12.us = phi i64 [ 0, %bb.nph.us ], [ %13, %bb1.us ] + %scevgep13 = getelementptr [4000 x [4000 x double]]* @A, i64 0, i64 %storemerge6.us, i64 %storemerge12.us + %scevgep = getelementptr [4000 x [4000 x double]]* @B, i64 0, i64 %storemerge6.us, i64 %storemerge12.us + %scevgep12 = getelementptr [4000 x double]* @x, i64 0, i64 %storemerge12.us + %6 = load double* %scevgep13, align 8 + %7 = load double* %scevgep12, align 8 + %8 = fmul double %6, %7 + %9 = fadd double %8, %.tmp.0.us + %10 = load double* %scevgep, align 8 + %11 = fmul double %10, %7 + %12 = fadd double %11, %.tmp3.0.us + %13 = add nsw i64 %storemerge12.us, 1 + %exitcond2 = icmp eq i64 %13, 4000 + br i1 %exitcond2, label %bb3.us, label %bb1.us + +bb.nph.us: ; preds = %bb3.us, %bb.nph10.split.us + %storemerge6.us = phi i64 [ 0, %bb.nph10.split.us ], [ %5, %bb3.us ] + %scevgep18 = getelementptr [4000 x double]* @y, i64 0, i64 %storemerge6.us + %scevgep17 = getelementptr [4000 x double]* @tmp, i64 0, i64 %storemerge6.us + store double 0.000000e+00, double* %scevgep17, align 8 + store double 0.000000e+00, double* %scevgep18, align 8 + br label %bb1.us + +return: ; preds = %bb3.us + ret void +} +; CHECK: Valid Region for Scop: bb.nph.us => return diff --git a/polly/test/polybench/linear-algebra/solvers/gramschmidt/gramschmidt.c b/polly/test/polybench/linear-algebra/solvers/gramschmidt/gramschmidt.c new file mode 100755 index 00000000000..762e19efe05 --- /dev/null +++ b/polly/test/polybench/linear-algebra/solvers/gramschmidt/gramschmidt.c @@ -0,0 +1,133 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + + +/* Default problem size. */ +#ifndef M +# define M 512 +#endif +#ifndef N +# define N 512 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif +#ifndef DATA_PRINTF_MODIFIER +# define DATA_PRINTF_MODIFIER "%0.2lf " +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +DATA_TYPE nrm; +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE A[M][N]; +DATA_TYPE R[M][N]; +DATA_TYPE Q[M][N]; +#else +DATA_TYPE** A = (DATA_TYPE**)malloc(M * sizeof(DATA_TYPE*)); +DATA_TYPE** R = (DATA_TYPE**)malloc(M * sizeof(DATA_TYPE*)); +DATA_TYPE** Q = (DATA_TYPE**)malloc(M * sizeof(DATA_TYPE*)); +{ + int i; + for (i = 0; i < M; ++i) + { + A[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); + R[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); + Q[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); + } +} +#endif + +inline +void init_array() +{ + int i, j; + + for (i = 0; i < M; i++) + for (j = 0; j < N; j++) + A[i][j] = ((DATA_TYPE) i*j) / M; +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < M; i++) + for (j = 0; j < N; j++) { + fprintf(stderr, DATA_PRINTF_MODIFIER, A[i][j]); + if ((i * M + j) % 80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long m = M; + long n = N; +#else +void scop_func(long m, long n) { +#endif + long i, j, k; + +#pragma scop +#pragma live-out A + + for (k = 0; k < n; k++) + { + nrm = 0; + for (i = 0; i < m; i++) + nrm += A[i][k] * A[i][k]; + R[k][k] = sqrt(nrm); + for (i = 0; i < m; i++) + Q[i][k] = A[i][k] / R[k][k]; + for (j = k + 1; j < n; j++) + { + R[k][j] = 0; + for (i = 0; i < m; i++) + R[k][j] += Q[i][k] * A[i][j]; + for (i = 0; i < m; i++) + A[i][j] = A[i][j] - Q[i][k] * R[k][j]; + } + } + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int i, j, k; + int m = M; + int n = N; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(m, n); +#endif + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/linear-algebra/solvers/gramschmidt/gramschmidt_with_param.ll b/polly/test/polybench/linear-algebra/solvers/gramschmidt/gramschmidt_with_param.ll new file mode 100644 index 00000000000..88315058fe3 --- /dev/null +++ b/polly/test/polybench/linear-algebra/solvers/gramschmidt/gramschmidt_with_param.ll @@ -0,0 +1,175 @@ +; RUN: opt %loadPolly %defaultOpts -polly-prepare -polly-cloog -analyze %s | FileCheck %s +; XFAIL: * +; ModuleID = './linear-algebra/solvers/gramschmidt/gramschmidt_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +@nrm = common global double 0.000000e+00, align 8 +@A = common global [512 x [512 x double]] zeroinitializer, align 16 +@R = common global [512 x [512 x double]] zeroinitializer, align 16 +@Q = common global [512 x [512 x double]] zeroinitializer, align 16 + +define void @scop_func(i64 %m, i64 %n) nounwind { +bb: + %tmp3 = icmp sgt i64 %m, 0 + %smax4 = select i1 %tmp3, i64 %m, i64 0 + %tmp = icmp sgt i64 %m, 0 + %smax = select i1 %tmp, i64 %m, i64 0 + %tmp12 = icmp sgt i64 %m, 0 + %smax13 = select i1 %tmp12, i64 %m, i64 0 + %tmp25 = icmp sgt i64 %m, 0 + %smax26 = select i1 %tmp25, i64 %m, i64 0 + %tmp40 = add i64 %n, -1 + %tmp60 = icmp sgt i64 %n, 0 + %smax61 = select i1 %tmp60, i64 %n, i64 0 + br label %bb1 + +bb1: ; preds = %bb58, %bb + %tmp2 = phi i64 [ 0, %bb ], [ %tmp59, %bb58 ] + %tmp63 = mul i64 %tmp2, 513 + %tmp64 = add i64 %tmp63, 1 + %tmp67 = add i64 %tmp2, 1 + %tmp71 = mul i64 %tmp2, -1 + %tmp45 = add i64 %tmp40, %tmp71 + %scevgep50 = getelementptr [512 x [512 x double]]* @R, i64 0, i64 0, i64 %tmp63 + %exitcond62 = icmp ne i64 %tmp2, %smax61 + br i1 %exitcond62, label %bb3, label %bb60 + +bb3: ; preds = %bb1 + store double 0.000000e+00, double* @nrm, align 8 + br label %bb4 + +bb4: ; preds = %bb12, %bb3 + %i.0 = phi i64 [ 0, %bb3 ], [ %tmp14, %bb12 ] + %scevgep = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %i.0, i64 %tmp2 + %exitcond5 = icmp ne i64 %i.0, %smax4 + br i1 %exitcond5, label %bb5, label %bb15 + +bb5: ; preds = %bb4 + %tmp7 = load double* %scevgep + %tmp8 = load double* %scevgep + %tmp9 = fmul double %tmp7, %tmp8 + %tmp10 = load double* @nrm, align 8 + %tmp11 = fadd double %tmp10, %tmp9 + store double %tmp11, double* @nrm, align 8 + br label %bb12 + +bb12: ; preds = %bb5 + %tmp14 = add nsw i64 %i.0, 1 + br label %bb4 + +bb15: ; preds = %bb4 + %tmp16 = load double* @nrm, align 8 + %tmp17 = call double @sqrt(double %tmp16) + store double %tmp17, double* %scevgep50 + br label %bb18 + +bb18: ; preds = %bb25, %bb15 + %i.1 = phi i64 [ 0, %bb15 ], [ %tmp26, %bb25 ] + %scevgep5 = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %i.1, i64 %tmp2 + %scevgep4 = getelementptr [512 x [512 x double]]* @Q, i64 0, i64 %i.1, i64 %tmp2 + %exitcond = icmp ne i64 %i.1, %smax + br i1 %exitcond, label %bb19, label %bb27 + +bb19: ; preds = %bb18 + %tmp21 = load double* %scevgep5 + %tmp23 = load double* %scevgep50 + %tmp24 = fdiv double %tmp21, %tmp23 + store double %tmp24, double* %scevgep4 + br label %bb25 + +bb25: ; preds = %bb19 + %tmp26 = add nsw i64 %i.1, 1 + br label %bb18 + +bb27: ; preds = %bb18 + br label %bb28 + +bb28: ; preds = %bb56, %bb27 + %indvar = phi i64 [ %indvar.next, %bb56 ], [ 0, %bb27 ] + %tmp65 = add i64 %tmp64, %indvar + %scevgep31 = getelementptr [512 x [512 x double]]* @R, i64 0, i64 0, i64 %tmp65 + %tmp68 = add i64 %tmp67, %indvar + %exitcond46 = icmp ne i64 %indvar, %tmp45 + br i1 %exitcond46, label %bb29, label %bb57 + +bb29: ; preds = %bb28 + store double 0.000000e+00, double* %scevgep31 + br label %bb30 + +bb30: ; preds = %bb43, %bb29 + %i.2 = phi i64 [ 0, %bb29 ], [ %tmp44, %bb43 ] + %scevgep11 = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %i.2, i64 %tmp68 + %scevgep12 = getelementptr [512 x [512 x double]]* @Q, i64 0, i64 %i.2, i64 %tmp2 + %exitcond14 = icmp ne i64 %i.2, %smax13 + br i1 %exitcond14, label %bb31, label %bb45 + +bb31: ; preds = %bb30 + %tmp33 = load double* %scevgep12 + %tmp34 = load double* %scevgep11 + %tmp38 = fmul double %tmp33, %tmp34 + %tmp41 = load double* %scevgep31 + %tmp42 = fadd double %tmp41, %tmp38 + store double %tmp42, double* %scevgep31 + br label %bb43 + +bb43: ; preds = %bb31 + %tmp44 = add nsw i64 %i.2, 1 + br label %bb30 + +bb45: ; preds = %bb30 + br label %bb46 + +bb46: ; preds = %bb53, %bb45 + %i.3 = phi i64 [ 0, %bb45 ], [ %tmp54, %bb53 ] + %scevgep18 = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %i.3, i64 %tmp68 + %scevgep19 = getelementptr [512 x [512 x double]]* @Q, i64 0, i64 %i.3, i64 %tmp2 + %exitcond27 = icmp ne i64 %i.3, %smax26 + br i1 %exitcond27, label %bb47, label %bb55 + +bb47: ; preds = %bb46 + %tmp48 = load double* %scevgep18 + %tmp49 = load double* %scevgep19 + %tmp50 = load double* %scevgep31 + %tmp51 = fmul double %tmp49, %tmp50 + %tmp52 = fsub double %tmp48, %tmp51 + store double %tmp52, double* %scevgep18 + br label %bb53 + +bb53: ; preds = %bb47 + %tmp54 = add nsw i64 %i.3, 1 + br label %bb46 + +bb55: ; preds = %bb46 + br label %bb56 + +bb56: ; preds = %bb55 + %indvar.next = add i64 %indvar, 1 + br label %bb28 + +bb57: ; preds = %bb28 + br label %bb58 + +bb58: ; preds = %bb57 + %tmp59 = add nsw i64 %tmp2, 1 + br label %bb1 + +bb60: ; preds = %bb1 + ret void +} + +declare double @sqrt(double) nounwind readnone + +define i32 @main(i32 %argc, i8** %argv) nounwind { +bb: + call void (...)* @init_array() + %tmp = sext i32 512 to i64 + %tmp1 = sext i32 512 to i64 + call void @scop_func(i64 %tmp, i64 %tmp1) + call void @print_array(i32 %argc, i8** %argv) + ret i32 0 +} + +declare void @init_array(...) + +declare void @print_array(i32, i8**) diff --git a/polly/test/polybench/linear-algebra/solvers/gramschmidt/gramschmidt_without_param.ll b/polly/test/polybench/linear-algebra/solvers/gramschmidt/gramschmidt_without_param.ll new file mode 100644 index 00000000000..84d107964d6 --- /dev/null +++ b/polly/test/polybench/linear-algebra/solvers/gramschmidt/gramschmidt_without_param.ll @@ -0,0 +1,162 @@ +; RUN: opt %loadPolly %defaultOpts -polly-prepare -polly-cloog -analyze %s | FileCheck %s +; ModuleID = './linear-algebra/solvers/gramschmidt/gramschmidt_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +@nrm = common global double 0.000000e+00, align 8 +@A = common global [512 x [512 x double]] zeroinitializer, align 16 +@R = common global [512 x [512 x double]] zeroinitializer, align 16 +@Q = common global [512 x [512 x double]] zeroinitializer, align 16 + +define void @scop_func() nounwind { +bb: + br label %bb1 + +bb1: ; preds = %bb51, %bb + %tmp2 = phi i64 [ 0, %bb ], [ %tmp52, %bb51 ] + %tmp49 = mul i64 %tmp2, 513 + %tmp50 = add i64 %tmp49, 1 + %tmp53 = add i64 %tmp2, 1 + %tmp57 = mul i64 %tmp2, -1 + %tmp28 = add i64 %tmp57, 511 + %scevgep39 = getelementptr [512 x [512 x double]]* @R, i64 0, i64 0, i64 %tmp49 + %exitcond48 = icmp ne i64 %tmp2, 512 + br i1 %exitcond48, label %bb3, label %bb53 + +bb3: ; preds = %bb1 + store double 0.000000e+00, double* @nrm, align 8 + br label %bb4 + +bb4: ; preds = %bb11, %bb3 + %i.0 = phi i64 [ 0, %bb3 ], [ %tmp12, %bb11 ] + %scevgep = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %i.0, i64 %tmp2 + %exitcond2 = icmp ne i64 %i.0, 512 + br i1 %exitcond2, label %bb5, label %bb13 + +bb5: ; preds = %bb4 + %tmp6 = load double* %scevgep + %tmp7 = load double* %scevgep + %tmp8 = fmul double %tmp6, %tmp7 + %tmp9 = load double* @nrm, align 8 + %tmp10 = fadd double %tmp9, %tmp8 + store double %tmp10, double* @nrm, align 8 + br label %bb11 + +bb11: ; preds = %bb5 + %tmp12 = add nsw i64 %i.0, 1 + br label %bb4 + +bb13: ; preds = %bb4 + %tmp15 = load double* @nrm, align 8 + %tmp16 = call double @sqrt(double %tmp15) + store double %tmp16, double* %scevgep39 + br label %bb17 + +bb17: ; preds = %bb22, %bb13 + %i.1 = phi i64 [ 0, %bb13 ], [ %tmp23, %bb22 ] + %scevgep3 = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %i.1, i64 %tmp2 + %scevgep2 = getelementptr [512 x [512 x double]]* @Q, i64 0, i64 %i.1, i64 %tmp2 + %exitcond = icmp ne i64 %i.1, 512 + br i1 %exitcond, label %bb18, label %bb24 + +bb18: ; preds = %bb17 + %tmp19 = load double* %scevgep3 + %tmp20 = load double* %scevgep39 + %tmp21 = fdiv double %tmp19, %tmp20 + store double %tmp21, double* %scevgep2 + br label %bb22 + +bb22: ; preds = %bb18 + %tmp23 = add nsw i64 %i.1, 1 + br label %bb17 + +bb24: ; preds = %bb17 + br label %bb25 + +bb25: ; preds = %bb49, %bb24 + %indvar = phi i64 [ %indvar.next, %bb49 ], [ 0, %bb24 ] + %tmp51 = add i64 %tmp50, %indvar + %scevgep23 = getelementptr [512 x [512 x double]]* @R, i64 0, i64 0, i64 %tmp51 + %tmp54 = add i64 %tmp53, %indvar + %exitcond29 = icmp ne i64 %indvar, %tmp28 + br i1 %exitcond29, label %bb26, label %bb50 + +bb26: ; preds = %bb25 + store double 0.000000e+00, double* %scevgep23 + br label %bb27 + +bb27: ; preds = %bb36, %bb26 + %i.2 = phi i64 [ 0, %bb26 ], [ %tmp37, %bb36 ] + %scevgep6 = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %i.2, i64 %tmp54 + %scevgep7 = getelementptr [512 x [512 x double]]* @Q, i64 0, i64 %i.2, i64 %tmp2 + %exitcond9 = icmp ne i64 %i.2, 512 + br i1 %exitcond9, label %bb28, label %bb38 + +bb28: ; preds = %bb27 + %tmp30 = load double* %scevgep7 + %tmp31 = load double* %scevgep6 + %tmp33 = fmul double %tmp30, %tmp31 + %tmp34 = load double* %scevgep23 + %tmp35 = fadd double %tmp34, %tmp33 + store double %tmp35, double* %scevgep23 + br label %bb36 + +bb36: ; preds = %bb28 + %tmp37 = add nsw i64 %i.2, 1 + br label %bb27 + +bb38: ; preds = %bb27 + br label %bb39 + +bb39: ; preds = %bb46, %bb38 + %i.3 = phi i64 [ 0, %bb38 ], [ %tmp47, %bb46 ] + %scevgep11 = getelementptr [512 x [512 x double]]* @A, i64 0, i64 %i.3, i64 %tmp54 + %scevgep12 = getelementptr [512 x [512 x double]]* @Q, i64 0, i64 %i.3, i64 %tmp2 + %exitcond16 = icmp ne i64 %i.3, 512 + br i1 %exitcond16, label %bb40, label %bb48 + +bb40: ; preds = %bb39 + %tmp41 = load double* %scevgep11 + %tmp42 = load double* %scevgep12 + %tmp43 = load double* %scevgep23 + %tmp44 = fmul double %tmp42, %tmp43 + %tmp45 = fsub double %tmp41, %tmp44 + store double %tmp45, double* %scevgep11 + br label %bb46 + +bb46: ; preds = %bb40 + %tmp47 = add nsw i64 %i.3, 1 + br label %bb39 + +bb48: ; preds = %bb39 + br label %bb49 + +bb49: ; preds = %bb48 + %indvar.next = add i64 %indvar, 1 + br label %bb25 + +bb50: ; preds = %bb25 + br label %bb51 + +bb51: ; preds = %bb50 + %tmp52 = add nsw i64 %tmp2, 1 + br label %bb1 + +bb53: ; preds = %bb1 + ret void +} + +declare double @sqrt(double) nounwind readnone + +define i32 @main(i32 %argc, i8** %argv) nounwind { +bb: + call void (...)* @init_array() + call void @scop_func() + call void @print_array(i32 %argc, i8** %argv) + ret i32 0 +} + +declare void @init_array(...) + +declare void @print_array(i32, i8**) +; CHECK: for region: 'bb1 => bb53' in function 'scop_func': diff --git a/polly/test/polybench/linear-algebra/solvers/lu/lu.c b/polly/test/polybench/linear-algebra/solvers/lu/lu.c new file mode 100755 index 00000000000..d5dcc91abd6 --- /dev/null +++ b/polly/test/polybench/linear-algebra/solvers/lu/lu.c @@ -0,0 +1,110 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + + +/* Default problem size. */ +#ifndef N +# define N 1024 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif +#ifndef DATA_PRINTF_MODIFIER +# define DATA_PRINTF_MODIFIER "%0.2lf " +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE A[N][N]; +#else +DATA_TYPE** A = (DATA_TYPE**)malloc(N * sizeof(DATA_TYPE*)); +{ + int i; + for (i = 0; i < M; ++i) + A[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +} +#endif + +inline +void init_array() +{ + int i, j; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + A[i][j] = ((DATA_TYPE) i*j + 1) / N; +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) { + fprintf(stderr, DATA_PRINTF_MODIFIER, A[i][j]); + if ((i * N + j) % 80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long n = N; +#else +void scop_func(long n) { +#endif + long i, j, k; + +#pragma scop +#pragma live-out A + + for (k = 0; k < n; k++) + { + for (j = k + 1; j < n; j++) + A[k][j] = A[k][j] / A[k][k]; + for(i = k + 1; i < n; i++) + for (j = k + 1; j < n; j++) + A[i][j] = A[i][j] - A[i][k] * A[k][j]; + } + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int i, j, k; + int n = N; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(n); +#endif + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/linear-algebra/solvers/lu/lu_with_param.ll b/polly/test/polybench/linear-algebra/solvers/lu/lu_with_param.ll new file mode 100644 index 00000000000..f122643894b --- /dev/null +++ b/polly/test/polybench/linear-algebra/solvers/lu/lu_with_param.ll @@ -0,0 +1,97 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | FileCheck %s +; region-simplify make polly fail to detect the canonical induction variable. +; XFAIL:* + +; ModuleID = './linear-algebra/solvers/lu/lu_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func(i64 %n) nounwind { +entry: + %0 = icmp sgt i64 %n, 0 + br i1 %0, label %bb.nph28, label %return + +bb1: ; preds = %bb1.preheader, %bb1 + %indvar = phi i64 [ %indvar.next, %bb1 ], [ 0, %bb1.preheader ] + %tmp27 = add i64 %tmp26, %indvar + %scevgep = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 0, i64 %tmp27 + %1 = load double* %scevgep, align 8 + %2 = load double* %scevgep69, align 8 + %3 = fdiv double %1, %2 + store double %3, double* %scevgep, align 8 + %indvar.next = add i64 %indvar, 1 + %exitcond20 = icmp eq i64 %indvar.next, %tmp1 + br i1 %exitcond20, label %bb8.loopexit, label %bb1 + +bb5: ; preds = %bb6.preheader, %bb5 + %indvar34 = phi i64 [ %indvar.next35, %bb5 ], [ 0, %bb6.preheader ] + %tmp34 = add i64 %tmp26, %indvar34 + %scevgep45 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp32, i64 %tmp34 + %scevgep46 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 0, i64 %tmp34 + %4 = load double* %scevgep45, align 8 + %5 = load double* %scevgep55, align 8 + %6 = load double* %scevgep46, align 8 + %7 = fmul double %5, %6 + %8 = fsub double %4, %7 + store double %8, double* %scevgep45, align 8 + %indvar.next35 = add i64 %indvar34, 1 + %exitcond2 = icmp eq i64 %indvar.next35, %tmp1 + br i1 %exitcond2, label %bb8.loopexit4, label %bb5 + +bb8.loopexit: ; preds = %bb1 + br i1 %10, label %bb6.preheader.preheader, label %bb9 + +bb6.preheader.preheader: ; preds = %bb8.loopexit + br label %bb6.preheader + +bb8.loopexit4: ; preds = %bb5 + %exitcond11 = icmp eq i64 %tmp57, %tmp1 + br i1 %exitcond11, label %bb9.loopexit, label %bb6.preheader + +bb6.preheader: ; preds = %bb6.preheader.preheader, %bb8.loopexit4 + %indvar39 = phi i64 [ %tmp57, %bb8.loopexit4 ], [ 0, %bb6.preheader.preheader ] + %tmp32 = add i64 %indvar39, 1 + %scevgep55 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp32, i64 %tmp25 + %tmp57 = add i64 %indvar39, 1 + br label %bb5 + +bb9.loopexit: ; preds = %bb8.loopexit4 + br label %bb9 + +bb9: ; preds = %bb9.loopexit, %bb2.preheader, %bb8.loopexit + %exitcond = icmp eq i64 %9, %n + br i1 %exitcond, label %return.loopexit, label %bb2.preheader + +bb.nph28: ; preds = %entry + %tmp29 = add i64 %n, -1 + br label %bb2.preheader + +bb2.preheader: ; preds = %bb.nph28, %bb9 + %storemerge17 = phi i64 [ 0, %bb.nph28 ], [ %9, %bb9 ] + %tmp25 = mul i64 %storemerge17, 1025 + %tmp26 = add i64 %tmp25, 1 + %tmp30 = mul i64 %storemerge17, -1 + %tmp1 = add i64 %tmp29, %tmp30 + %storemerge15 = add i64 %storemerge17, 1 + %scevgep69 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 0, i64 %tmp25 + %9 = add i64 %storemerge17, 1 + %10 = icmp slt i64 %storemerge15, %n + br i1 %10, label %bb1.preheader, label %bb9 + +bb1.preheader: ; preds = %bb2.preheader + br label %bb1 + +return.loopexit: ; preds = %bb9 + br label %return + +return: ; preds = %return.loopexit, %entry + ret void +} +; CHECK: Valid Region for Scop: entry.split => return diff --git a/polly/test/polybench/linear-algebra/solvers/lu/lu_without_param.ll b/polly/test/polybench/linear-algebra/solvers/lu/lu_without_param.ll new file mode 100644 index 00000000000..93a3c1b214e --- /dev/null +++ b/polly/test/polybench/linear-algebra/solvers/lu/lu_without_param.ll @@ -0,0 +1,89 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | FileCheck %s +; region-simplify make polly fail to detect the canonical induction variable. +; XFAIL:* + +; ModuleID = './linear-algebra/solvers/lu/lu_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func() nounwind { +bb.nph28: + br label %bb2.preheader + +bb1: ; preds = %bb1.preheader, %bb1 + %indvar = phi i64 [ %indvar.next, %bb1 ], [ 0, %bb1.preheader ] + %tmp27 = add i64 %tmp26, %indvar + %scevgep = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 0, i64 %tmp27 + %0 = load double* %scevgep, align 8 + %1 = load double* %scevgep68, align 8 + %2 = fdiv double %0, %1 + store double %2, double* %scevgep, align 8 + %indvar.next = add i64 %indvar, 1 + %exitcond20 = icmp eq i64 %indvar.next, %tmp1 + br i1 %exitcond20, label %bb8.loopexit, label %bb1 + +bb5: ; preds = %bb6.preheader, %bb5 + %indvar33 = phi i64 [ %indvar.next34, %bb5 ], [ 0, %bb6.preheader ] + %tmp33 = add i64 %tmp26, %indvar33 + %scevgep44 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp31, i64 %tmp33 + %scevgep45 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 0, i64 %tmp33 + %3 = load double* %scevgep44, align 8 + %4 = load double* %scevgep54, align 8 + %5 = load double* %scevgep45, align 8 + %6 = fmul double %4, %5 + %7 = fsub double %3, %6 + store double %7, double* %scevgep44, align 8 + %indvar.next34 = add i64 %indvar33, 1 + %exitcond2 = icmp eq i64 %indvar.next34, %tmp1 + br i1 %exitcond2, label %bb8.loopexit4, label %bb5 + +bb8.loopexit: ; preds = %bb1 + br i1 %9, label %bb6.preheader.preheader, label %bb9 + +bb6.preheader.preheader: ; preds = %bb8.loopexit + br label %bb6.preheader + +bb8.loopexit4: ; preds = %bb5 + %exitcond11 = icmp eq i64 %tmp56, %tmp1 + br i1 %exitcond11, label %bb9.loopexit, label %bb6.preheader + +bb6.preheader: ; preds = %bb6.preheader.preheader, %bb8.loopexit4 + %indvar38 = phi i64 [ %tmp56, %bb8.loopexit4 ], [ 0, %bb6.preheader.preheader ] + %tmp31 = add i64 %indvar38, 1 + %scevgep54 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp31, i64 %tmp25 + %tmp56 = add i64 %indvar38, 1 + br label %bb5 + +bb9.loopexit: ; preds = %bb8.loopexit4 + br label %bb9 + +bb9: ; preds = %bb9.loopexit, %bb2.preheader, %bb8.loopexit + %exitcond = icmp eq i64 %8, 1024 + br i1 %exitcond, label %return, label %bb2.preheader + +bb2.preheader: ; preds = %bb9, %bb.nph28 + %storemerge17 = phi i64 [ 0, %bb.nph28 ], [ %8, %bb9 ] + %tmp25 = mul i64 %storemerge17, 1025 + %tmp26 = add i64 %tmp25, 1 + %tmp29 = mul i64 %storemerge17, -1 + %tmp1 = add i64 %tmp29, 1023 + %storemerge15 = add i64 %storemerge17, 1 + %scevgep68 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 0, i64 %tmp25 + %8 = add i64 %storemerge17, 1 + %9 = icmp slt i64 %storemerge15, 1024 + br i1 %9, label %bb1.preheader, label %bb9 + +bb1.preheader: ; preds = %bb2.preheader + br label %bb1 + +return: ; preds = %bb9 + ret void +} +; CHECK: Valid Region for Scop: bb2.preheader => return diff --git a/polly/test/polybench/linear-algebra/solvers/ludcmp/ludcmp.c b/polly/test/polybench/linear-algebra/solvers/ludcmp/ludcmp.c new file mode 100755 index 00000000000..c5fdb484216 --- /dev/null +++ b/polly/test/polybench/linear-algebra/solvers/ludcmp/ludcmp.c @@ -0,0 +1,145 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + + +/* Default problem size. */ +#ifndef N +# define N 1024 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif +#ifndef DATA_PRINTF_MODIFIER +# define DATA_PRINTF_MODIFIER "%0.2lf " +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +DATA_TYPE w; +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE a[N+1][N+1]; +DATA_TYPE x[N+1]; +DATA_TYPE y[N+1]; +DATA_TYPE b[N+1]; +#else +DATA_TYPE** a = (DATA_TYPE**)malloc((N + 1) * sizeof(DATA_TYPE*)); +DATA_TYPE* x = (DATA_TYPE*)malloc((N + 1) * sizeof(DATA_TYPE)); +DATA_TYPE* y = (DATA_TYPE*)malloc((N + 1) * sizeof(DATA_TYPE)); +DATA_TYPE* b = (DATA_TYPE*)malloc((N + 1) * sizeof(DATA_TYPE)); +{ + int i; + for (i = 0; i <= N; ++i) + a[i] = (DATA_TYPE*)malloc((N + 1) * sizeof(DATA_TYPE)); +} +#endif + +inline +void init_array() +{ + int i, j; + + for (i = 0; i <= N; i++) + { + x[i] = ((DATA_TYPE) i + 1) / N; + b[i] = ((DATA_TYPE) i + 2) / N; + for (j = 0; j <= N; j++) + a[i][j] = ((DATA_TYPE) i*j + 1) / N; + } +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i <= N; i++) { + fprintf(stderr, DATA_PRINTF_MODIFIER, x[i]); + if (i % 80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long n = N; +#else +void scop_func(long n) { +#endif + long i, j, k; +#pragma scop +#pragma live-out x + + b[0] = 1.0; + for (i = 0; i < n; i++) + { + for (j = i+1; j <= n; j++) + { + w = a[j][i]; + for (k = 0; k < i; k++) + w = w- a[j][k] * a[k][i]; + a[j][i] = w / a[i][i]; + } + for (j = i+1; j <= n; j++) + { + w = a[i+1][j]; + for (k = 0; k <= i; k++) + w = w - a[i+1][k] * a[k][j]; + a[i+1][j] = w; + } + } + y[0] = b[0]; + for (i = 1; i <= n; i++) + { + w = b[i]; + for (j = 0; j < i; j++) + w = w - a[i][j] * y[j]; + y[i] = w; + } + x[n] = y[n] / a[n][n]; + for (i = 0; i <= n - 1; i++) + { + w = y[n - 1 - (i)]; + for (j = n - i; j <= n; j++) + w = w - a[n - 1 - i][j] * x[j]; + x[n - 1 - i] = w / a[n - 1 - (i)][n - 1-(i)]; + } + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int i, j, k; + int n = N; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(n); +#endif + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/linear-algebra/solvers/ludcmp/ludcmp_with_param.ll b/polly/test/polybench/linear-algebra/solvers/ludcmp/ludcmp_with_param.ll new file mode 100644 index 00000000000..ef2e03ec378 --- /dev/null +++ b/polly/test/polybench/linear-algebra/solvers/ludcmp/ludcmp_with_param.ll @@ -0,0 +1,301 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -print-top-scop-only -analyze %s | FileCheck %s +; XFAIL: * +; ModuleID = './linear-algebra/solvers/ludcmp/ludcmp_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@x = common global [1025 x double] zeroinitializer, align 32 +@b = common global [1025 x double] zeroinitializer, align 32 +@a = common global [1025 x [1025 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@w = common global double 0.000000e+00 +@y = common global [1025 x double] zeroinitializer, align 32 + +define void @scop_func(i64 %n) nounwind { +entry: + store double 1.000000e+00, double* getelementptr inbounds ([1025 x double]* @b, i64 0, i64 0), align 32 + %0 = icmp sgt i64 %n, 0 + br i1 %0, label %bb.nph81, label %bb14 + +bb.nph43: ; preds = %bb5.preheader + %1 = icmp sgt i64 %storemerge60, 0 + br i1 %1, label %bb.nph43.split.us, label %bb4.preheader + +bb4.preheader: ; preds = %bb.nph43 + br label %bb4 + +bb.nph43.split.us: ; preds = %bb.nph43 + br label %bb.nph35.us + +bb4.us: ; preds = %bb2.us + %.lcssa63 = phi double [ %9, %bb2.us ] + %2 = load double* %scevgep110, align 8 + %3 = fdiv double %.lcssa63, %2 + store double %3, double* %scevgep148, align 8 + %4 = icmp sgt i64 %storemerge5.us, %n + br i1 %4, label %bb11.loopexit.loopexit1, label %bb.nph35.us + +bb2.us: ; preds = %bb.nph35.us, %bb2.us + %5 = phi double [ %11, %bb.nph35.us ], [ %9, %bb2.us ] + %storemerge834.us = phi i64 [ 0, %bb.nph35.us ], [ %10, %bb2.us ] + %scevgep141 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %tmp96, i64 %storemerge834.us + %scevgep136 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %storemerge834.us, i64 %storemerge60 + %6 = load double* %scevgep141, align 8 + %7 = load double* %scevgep136, align 8 + %8 = fmul double %6, %7 + %9 = fsub double %5, %8 + %10 = add nsw i64 %storemerge834.us, 1 + %exitcond64 = icmp eq i64 %10, %storemerge60 + br i1 %exitcond64, label %bb4.us, label %bb2.us + +bb.nph35.us: ; preds = %bb4.us, %bb.nph43.split.us + %indvar137 = phi i64 [ %tmp146, %bb4.us ], [ 0, %bb.nph43.split.us ] + %storemerge5.us = add i64 %tmp, %indvar137 + %tmp93 = add i64 %indvar137, 1 + %scevgep148 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %tmp93, i64 %tmp87 + %tmp96 = add i64 %storemerge538, %indvar137 + %tmp146 = add i64 %indvar137, 1 + %11 = load double* %scevgep148, align 8 + br label %bb2.us + +bb4: ; preds = %bb4.preheader, %bb4 + %indvar152 = phi i64 [ %indvar.next153, %bb4 ], [ 0, %bb4.preheader ] + %tmp99 = add i64 %indvar152, 1 + %scevgep157 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %tmp99, i64 %tmp87 + %storemerge5 = add i64 %tmp, %indvar152 + %12 = load double* %scevgep157, align 8 + %13 = load double* %scevgep110, align 8 + %14 = fdiv double %12, %13 + store double %14, double* %scevgep157, align 8 + %15 = icmp sgt i64 %storemerge5, %n + %indvar.next153 = add i64 %indvar152, 1 + br i1 %15, label %bb11.loopexit.loopexit, label %bb4 + +bb.nph56: ; preds = %bb11.loopexit + br i1 false, label %bb10.us.preheader, label %bb.nph47.preheader + +bb10.us.preheader: ; preds = %bb.nph56 + br label %bb10.us + +bb.nph47.preheader: ; preds = %bb.nph56 + br label %bb.nph47 + +bb10.us: ; preds = %bb10.us.preheader, %bb10.us + %indvar122 = phi i64 [ %indvar.next123, %bb10.us ], [ 0, %bb10.us.preheader ] + %storemerge6.us = add i64 %tmp, %indvar122 + %tmp89 = add i64 %tmp88, %indvar122 + %scevgep128 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 1, i64 %tmp89 + %16 = load double* %scevgep128, align 8 + store double %16, double* %scevgep128, align 8 + %17 = icmp sgt i64 %storemerge6.us, %n + %indvar.next123 = add i64 %indvar122, 1 + br i1 %17, label %bb13.loopexit.loopexit2, label %bb10.us + +bb.nph47: ; preds = %bb.nph47.preheader, %bb10 + %indvar162 = phi i64 [ %indvar.next163, %bb10 ], [ 0, %bb.nph47.preheader ] + %storemerge6 = add i64 %tmp, %indvar162 + %tmp104 = add i64 %tmp88, %indvar162 + %scevgep180 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 1, i64 %tmp104 + %tmp107 = add i64 %storemerge538, %indvar162 + %18 = load double* %scevgep180, align 8 + br label %bb8 + +bb8: ; preds = %bb8, %bb.nph47 + %w.tmp.048 = phi double [ %18, %bb.nph47 ], [ %22, %bb8 ] + %storemerge746 = phi i64 [ 0, %bb.nph47 ], [ %23, %bb8 ] + %scevgep166 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %storemerge746, i64 %tmp107 + %scevgep167 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %storemerge538, i64 %storemerge746 + %19 = load double* %scevgep167, align 8 + %20 = load double* %scevgep166, align 8 + %21 = fmul double %19, %20 + %22 = fsub double %w.tmp.048, %21 + %23 = add nsw i64 %storemerge746, 1 + %exitcond = icmp eq i64 %23, %smax + br i1 %exitcond, label %bb10, label %bb8 + +bb10: ; preds = %bb8 + %.lcssa40 = phi double [ %22, %bb8 ] + store double %.lcssa40, double* %scevgep180, align 8 + %24 = icmp sgt i64 %storemerge6, %n + %indvar.next163 = add i64 %indvar162, 1 + br i1 %24, label %bb13.loopexit.loopexit, label %bb.nph47 + +bb11.loopexit.loopexit: ; preds = %bb4 + %.lcssa57 = phi double [ %12, %bb4 ] + br label %bb11.loopexit + +bb11.loopexit.loopexit1: ; preds = %bb4.us + %.lcssa63.lcssa = phi double [ %.lcssa63, %bb4.us ] + br label %bb11.loopexit + +bb11.loopexit: ; preds = %bb11.loopexit.loopexit1, %bb11.loopexit.loopexit, %bb5.preheader + %w.tmp.082 = phi double [ %w.tmp.1, %bb5.preheader ], [ %.lcssa57, %bb11.loopexit.loopexit ], [ %.lcssa63.lcssa, %bb11.loopexit.loopexit1 ] + %25 = icmp sgt i64 %storemerge538, %n + br i1 %25, label %bb13.loopexit, label %bb.nph56 + +bb13.loopexit.loopexit: ; preds = %bb10 + %.lcssa40.lcssa = phi double [ %.lcssa40, %bb10 ] + br label %bb13.loopexit + +bb13.loopexit.loopexit2: ; preds = %bb10.us + %.lcssa77 = phi double [ %16, %bb10.us ] + br label %bb13.loopexit + +bb13.loopexit: ; preds = %bb13.loopexit.loopexit2, %bb13.loopexit.loopexit, %bb11.loopexit + %w.tmp.2 = phi double [ %w.tmp.082, %bb11.loopexit ], [ %.lcssa40.lcssa, %bb13.loopexit.loopexit ], [ %.lcssa77, %bb13.loopexit.loopexit2 ] + %indvar.next42 = add i64 %storemerge60, 1 + %exitcond84 = icmp ne i64 %indvar.next42, %n + br i1 %exitcond84, label %bb5.preheader, label %bb13.bb14_crit_edge + +bb13.bb14_crit_edge: ; preds = %bb13.loopexit + %w.tmp.2.lcssa = phi double [ %w.tmp.2, %bb13.loopexit ] + store double %w.tmp.2.lcssa, double* @w + br label %bb14 + +bb.nph81: ; preds = %entry + %w.promoted = load double* @w + br label %bb5.preheader + +bb5.preheader: ; preds = %bb.nph81, %bb13.loopexit + %storemerge60 = phi i64 [ 0, %bb.nph81 ], [ %indvar.next42, %bb13.loopexit ] + %w.tmp.1 = phi double [ %w.promoted, %bb.nph81 ], [ %w.tmp.2, %bb13.loopexit ] + %tmp = add i64 %storemerge60, 2 + %tmp87 = mul i64 %storemerge60, 1026 + %tmp88 = add i64 %tmp87, 1 + %storemerge538 = add i64 %storemerge60, 1 + %scevgep110 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 0, i64 %tmp87 + %tmp44 = icmp sgt i64 %storemerge538, 1 + %smax = select i1 %tmp44, i64 %storemerge538, i64 1 + %26 = icmp sgt i64 %storemerge538, %n + br i1 %26, label %bb11.loopexit, label %bb.nph43 + +bb14: ; preds = %bb13.bb14_crit_edge, %entry + store double 1.000000e+00, double* getelementptr inbounds ([1025 x double]* @y, i64 0, i64 0), align 32 + %27 = icmp slt i64 %n, 1 + br i1 %27, label %bb20, label %bb15.preheader + +bb15.preheader: ; preds = %bb14 + br label %bb15 + +bb15: ; preds = %bb15.preheader, %bb18 + %indvar111 = phi i64 [ %28, %bb18 ], [ 0, %bb15.preheader ] + %storemerge126 = add i64 %indvar111, 1 + %tmp117 = add i64 %indvar111, 2 + %scevgep119 = getelementptr [1025 x double]* @b, i64 0, i64 %storemerge126 + %scevgep118 = getelementptr [1025 x double]* @y, i64 0, i64 %storemerge126 + %28 = add i64 %indvar111, 1 + %29 = load double* %scevgep119, align 8 + %30 = icmp sgt i64 %storemerge126, 0 + br i1 %30, label %bb16.preheader, label %bb18 + +bb16.preheader: ; preds = %bb15 + br label %bb16 + +bb16: ; preds = %bb16.preheader, %bb16 + %31 = phi double [ %35, %bb16 ], [ %29, %bb16.preheader ] + %storemerge423 = phi i64 [ %36, %bb16 ], [ 0, %bb16.preheader ] + %scevgep114 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %storemerge126, i64 %storemerge423 + %scevgep113 = getelementptr [1025 x double]* @y, i64 0, i64 %storemerge423 + %32 = load double* %scevgep114, align 8 + %33 = load double* %scevgep113, align 8 + %34 = fmul double %32, %33 + %35 = fsub double %31, %34 + %36 = add nsw i64 %storemerge423, 1 + %exitcond4 = icmp eq i64 %36, %storemerge126 + br i1 %exitcond4, label %bb18.loopexit, label %bb16 + +bb18.loopexit: ; preds = %bb16 + %.lcssa = phi double [ %35, %bb16 ] + br label %bb18 + +bb18: ; preds = %bb18.loopexit, %bb15 + %w.tmp.032 = phi double [ %29, %bb15 ], [ %.lcssa, %bb18.loopexit ] + store double %w.tmp.032, double* %scevgep118, align 8 + %37 = icmp sgt i64 %tmp117, %n + br i1 %37, label %bb19.bb20_crit_edge, label %bb15 + +bb19.bb20_crit_edge: ; preds = %bb18 + %w.tmp.032.lcssa = phi double [ %w.tmp.032, %bb18 ] + store double %w.tmp.032.lcssa, double* @w + br label %bb20 + +bb20: ; preds = %bb19.bb20_crit_edge, %bb14 + %38 = getelementptr inbounds [1025 x double]* @y, i64 0, i64 %n + %39 = load double* %38, align 8 + %40 = getelementptr inbounds [1025 x [1025 x double]]* @a, i64 0, i64 %n, i64 %n + %41 = load double* %40, align 8 + %42 = fdiv double %39, %41 + %43 = getelementptr inbounds [1025 x double]* @x, i64 0, i64 %n + store double %42, double* %43, align 8 + %44 = add nsw i64 %n, -1 + %45 = icmp slt i64 %44, 0 + br i1 %45, label %return, label %bb.nph19 + +bb.nph19: ; preds = %bb20 + %tmp86 = mul i64 %n, 1026 + %tmp90 = add i64 %n, 1 + %tmp94 = add i64 %tmp86, -1 + %tmp34 = add i64 %n, -1 + br label %bb21 + +bb21: ; preds = %bb24, %bb.nph19 + %storemerge211 = phi i64 [ 0, %bb.nph19 ], [ %46, %bb24 ] + %tmp23 = mul i64 %storemerge211, -1026 + %tmp24 = add i64 %tmp86, %tmp23 + %tmp27 = mul i64 %storemerge211, -1 + %tmp106 = add i64 %n, %tmp27 + %tmp31 = add i64 %tmp90, %tmp27 + %tmp109 = add i64 %storemerge211, 1 + %tmp35 = add i64 %tmp34, %tmp27 + %scevgep100 = getelementptr [1025 x double]* @y, i64 0, i64 %tmp35 + %scevgep99 = getelementptr [1025 x double]* @x, i64 0, i64 %tmp35 + %tmp38 = add i64 %tmp94, %tmp23 + %scevgep96 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 -1, i64 %tmp38 + %46 = add i64 %storemerge211, 1 + %47 = load double* %scevgep100, align 8 + %48 = icmp sgt i64 %tmp106, %n + br i1 %48, label %bb24, label %bb22.preheader + +bb22.preheader: ; preds = %bb21 + br label %bb22 + +bb22: ; preds = %bb22.preheader, %bb22 + %indvar = phi i64 [ %indvar.next, %bb22 ], [ 0, %bb22.preheader ] + %w.tmp.0 = phi double [ %52, %bb22 ], [ %47, %bb22.preheader ] + %tmp25 = add i64 %tmp24, %indvar + %scevgep89 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 -1, i64 %tmp25 + %tmp29 = add i64 %tmp106, %indvar + %scevgep = getelementptr [1025 x double]* @x, i64 0, i64 %tmp29 + %tmp92 = add i64 %tmp31, %indvar + %49 = load double* %scevgep89, align 8 + %50 = load double* %scevgep, align 8 + %51 = fmul double %49, %50 + %52 = fsub double %w.tmp.0, %51 + %53 = icmp sgt i64 %tmp92, %n + %indvar.next = add i64 %indvar, 1 + br i1 %53, label %bb24.loopexit, label %bb22 + +bb24.loopexit: ; preds = %bb22 + %.lcssa12 = phi double [ %52, %bb22 ] + br label %bb24 + +bb24: ; preds = %bb24.loopexit, %bb21 + %w.tmp.021 = phi double [ %47, %bb21 ], [ %.lcssa12, %bb24.loopexit ] + %54 = load double* %scevgep96, align 8 + %55 = fdiv double %w.tmp.021, %54 + store double %55, double* %scevgep99, align 8 + %56 = icmp slt i64 %44, %tmp109 + br i1 %56, label %bb25.return_crit_edge, label %bb21 + +bb25.return_crit_edge: ; preds = %bb24 + %w.tmp.021.lcssa = phi double [ %w.tmp.021, %bb24 ] + store double %w.tmp.021.lcssa, double* @w + ret void + +return: ; preds = %bb20 + ret void +} diff --git a/polly/test/polybench/linear-algebra/solvers/ludcmp/ludcmp_without_param.ll b/polly/test/polybench/linear-algebra/solvers/ludcmp/ludcmp_without_param.ll new file mode 100644 index 00000000000..2fa70af6d8e --- /dev/null +++ b/polly/test/polybench/linear-algebra/solvers/ludcmp/ludcmp_without_param.ll @@ -0,0 +1,249 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -polly-cloog -analyze %s | FileCheck %s +; region-simplify make polly fail to detect the canonical induction variable. +; XFAIL:* + +; ModuleID = './linear-algebra/solvers/ludcmp/ludcmp_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@x = common global [1025 x double] zeroinitializer, align 32 +@b = common global [1025 x double] zeroinitializer, align 32 +@a = common global [1025 x [1025 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 +@w = common global double 0.000000e+00 +@y = common global [1025 x double] zeroinitializer, align 32 + +define void @scop_func() nounwind { +bb.nph76: + store double 1.000000e+00, double* getelementptr inbounds ([1025 x double]* @b, i64 0, i64 0), align 32 + %w.promoted = load double* @w + br label %bb5.preheader + +bb.nph38: ; preds = %bb5.preheader + %0 = icmp sgt i64 %storemerge55, 0 + br i1 %0, label %bb.nph38.split.us, label %bb4.preheader + +bb4.preheader: ; preds = %bb.nph38 + br label %bb4 + +bb.nph38.split.us: ; preds = %bb.nph38 + br label %bb.nph30.us + +bb4.us: ; preds = %bb2.us + %.lcssa62 = phi double [ %7, %bb2.us ] + %1 = load double* %scevgep109, align 8 + %2 = fdiv double %.lcssa62, %1 + store double %2, double* %scevgep141, align 8 + %exitcond70 = icmp eq i64 %tmp139, %tmp46 + br i1 %exitcond70, label %bb11.loopexit.loopexit1, label %bb.nph30.us + +bb2.us: ; preds = %bb.nph30.us, %bb2.us + %3 = phi double [ %9, %bb.nph30.us ], [ %7, %bb2.us ] + %storemerge829.us = phi i64 [ 0, %bb.nph30.us ], [ %8, %bb2.us ] + %scevgep134 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %tmp95, i64 %storemerge829.us + %scevgep129 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %storemerge829.us, i64 %storemerge55 + %4 = load double* %scevgep134, align 8 + %5 = load double* %scevgep129, align 8 + %6 = fmul double %4, %5 + %7 = fsub double %3, %6 + %8 = add nsw i64 %storemerge829.us, 1 + %exitcond63 = icmp eq i64 %8, %storemerge55 + br i1 %exitcond63, label %bb4.us, label %bb2.us + +bb.nph30.us: ; preds = %bb4.us, %bb.nph38.split.us + %indvar130 = phi i64 [ %tmp139, %bb4.us ], [ 0, %bb.nph38.split.us ] + %tmp92 = add i64 %indvar130, 1 + %scevgep141 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %tmp92, i64 %tmp86 + %tmp95 = add i64 %storemerge533, %indvar130 + %tmp139 = add i64 %indvar130, 1 + %9 = load double* %scevgep141, align 8 + br label %bb2.us + +bb4: ; preds = %bb4.preheader, %bb4 + %indvar145 = phi i64 [ %indvar.next146, %bb4 ], [ 0, %bb4.preheader ] + %tmp99 = add i64 %indvar145, 1 + %scevgep150 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %tmp99, i64 %tmp86 + %10 = load double* %scevgep150, align 8 + %11 = load double* %scevgep109, align 8 + %12 = fdiv double %10, %11 + store double %12, double* %scevgep150, align 8 + %indvar.next146 = add i64 %indvar145, 1 + %exitcond58 = icmp eq i64 %indvar.next146, %tmp46 + br i1 %exitcond58, label %bb11.loopexit.loopexit, label %bb4 + +bb.nph51: ; preds = %bb11.loopexit + br i1 false, label %bb10.us.preheader, label %bb.nph42.preheader + +bb10.us.preheader: ; preds = %bb.nph51 + br label %bb10.us + +bb.nph42.preheader: ; preds = %bb.nph51 + br label %bb.nph42 + +bb10.us: ; preds = %bb10.us.preheader, %bb10.us + %indvar114 = phi i64 [ %indvar.next115, %bb10.us ], [ 0, %bb10.us.preheader ] + %tmp88 = add i64 %tmp87, %indvar114 + %scevgep121 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 1, i64 %tmp88 + %13 = load double* %scevgep121, align 8 + store double %13, double* %scevgep121, align 8 + %indvar.next115 = add i64 %indvar114, 1 + %exitcond80 = icmp eq i64 %indvar.next115, %tmp46 + br i1 %exitcond80, label %bb13.loopexit.loopexit2, label %bb10.us + +bb.nph42: ; preds = %bb.nph42.preheader, %bb10 + %indvar155 = phi i64 [ %indvar.next156, %bb10 ], [ 0, %bb.nph42.preheader ] + %tmp102 = add i64 %tmp87, %indvar155 + %scevgep173 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 1, i64 %tmp102 + %tmp104 = add i64 %storemerge533, %indvar155 + %14 = load double* %scevgep173, align 8 + br label %bb8 + +bb8: ; preds = %bb8, %bb.nph42 + %w.tmp.043 = phi double [ %14, %bb.nph42 ], [ %18, %bb8 ] + %storemerge741 = phi i64 [ 0, %bb.nph42 ], [ %19, %bb8 ] + %scevgep159 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %storemerge741, i64 %tmp104 + %scevgep160 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %storemerge533, i64 %storemerge741 + %15 = load double* %scevgep160, align 8 + %16 = load double* %scevgep159, align 8 + %17 = fmul double %15, %16 + %18 = fsub double %w.tmp.043, %17 + %19 = add nsw i64 %storemerge741, 1 + %exitcond41 = icmp eq i64 %19, %storemerge533 + br i1 %exitcond41, label %bb10, label %bb8 + +bb10: ; preds = %bb8 + %.lcssa37 = phi double [ %18, %bb8 ] + store double %.lcssa37, double* %scevgep173, align 8 + %indvar.next156 = add i64 %indvar155, 1 + %exitcond47 = icmp eq i64 %indvar.next156, %tmp46 + br i1 %exitcond47, label %bb13.loopexit.loopexit, label %bb.nph42 + +bb11.loopexit.loopexit: ; preds = %bb4 + %.lcssa55 = phi double [ %10, %bb4 ] + br label %bb11.loopexit + +bb11.loopexit.loopexit1: ; preds = %bb4.us + %.lcssa62.lcssa = phi double [ %.lcssa62, %bb4.us ] + br label %bb11.loopexit + +bb11.loopexit: ; preds = %bb11.loopexit.loopexit1, %bb11.loopexit.loopexit, %bb5.preheader + %w.tmp.077 = phi double [ %w.tmp.1, %bb5.preheader ], [ %.lcssa55, %bb11.loopexit.loopexit ], [ %.lcssa62.lcssa, %bb11.loopexit.loopexit1 ] + br i1 false, label %bb13.loopexit, label %bb.nph51 + +bb13.loopexit.loopexit: ; preds = %bb10 + %.lcssa37.lcssa = phi double [ %.lcssa37, %bb10 ] + br label %bb13.loopexit + +bb13.loopexit.loopexit2: ; preds = %bb10.us + %.lcssa77 = phi double [ %13, %bb10.us ] + br label %bb13.loopexit + +bb13.loopexit: ; preds = %bb13.loopexit.loopexit2, %bb13.loopexit.loopexit, %bb11.loopexit + %w.tmp.2 = phi double [ %w.tmp.077, %bb11.loopexit ], [ %.lcssa37.lcssa, %bb13.loopexit.loopexit ], [ %.lcssa77, %bb13.loopexit.loopexit2 ] + %indvar.next39 = add i64 %storemerge55, 1 + %exitcond85 = icmp ne i64 %indvar.next39, 1024 + br i1 %exitcond85, label %bb5.preheader, label %bb.nph25 + +bb5.preheader: ; preds = %bb13.loopexit, %bb.nph76 + %storemerge55 = phi i64 [ %indvar.next39, %bb13.loopexit ], [ 0, %bb.nph76 ] + %w.tmp.1 = phi double [ %w.promoted, %bb.nph76 ], [ %w.tmp.2, %bb13.loopexit ] + %tmp86 = mul i64 %storemerge55, 1026 + %tmp87 = add i64 %tmp86, 1 + %tmp90 = mul i64 %storemerge55, -1 + %tmp46 = add i64 %tmp90, 1024 + %storemerge533 = add i64 %storemerge55, 1 + %scevgep109 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 0, i64 %tmp86 + br i1 false, label %bb11.loopexit, label %bb.nph38 + +bb.nph25: ; preds = %bb13.loopexit + %w.tmp.2.lcssa = phi double [ %w.tmp.2, %bb13.loopexit ] + store double %w.tmp.2.lcssa, double* @w + store double 1.000000e+00, double* getelementptr inbounds ([1025 x double]* @y, i64 0, i64 0), align 32 + br label %bb.nph19 + +bb.nph19: ; preds = %bb18, %bb.nph25 + %indvar102 = phi i64 [ 0, %bb.nph25 ], [ %tmp, %bb18 ] + %tmp29 = add i64 %indvar102, 1 + %scevgep111 = getelementptr [1025 x double]* @b, i64 0, i64 %tmp29 + %scevgep110 = getelementptr [1025 x double]* @y, i64 0, i64 %tmp29 + %tmp = add i64 %indvar102, 1 + %20 = load double* %scevgep111, align 8 + br label %bb16 + +bb16: ; preds = %bb16, %bb.nph19 + %21 = phi double [ %20, %bb.nph19 ], [ %25, %bb16 ] + %storemerge418 = phi i64 [ 0, %bb.nph19 ], [ %26, %bb16 ] + %scevgep106 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 %tmp29, i64 %storemerge418 + %scevgep105 = getelementptr [1025 x double]* @y, i64 0, i64 %storemerge418 + %22 = load double* %scevgep106, align 8 + %23 = load double* %scevgep105, align 8 + %24 = fmul double %22, %23 + %25 = fsub double %21, %24 + %26 = add nsw i64 %storemerge418, 1 + %exitcond = icmp eq i64 %26, %tmp29 + br i1 %exitcond, label %bb18, label %bb16 + +bb18: ; preds = %bb16 + %.lcssa28 = phi double [ %25, %bb16 ] + store double %.lcssa28, double* %scevgep110, align 8 + %exitcond32 = icmp eq i64 %tmp, 1024 + br i1 %exitcond32, label %bb.nph14, label %bb.nph19 + +bb.nph14: ; preds = %bb18 + %.lcssa28.lcssa = phi double [ %.lcssa28, %bb18 ] + store double %.lcssa28.lcssa, double* @w + %27 = load double* getelementptr inbounds ([1025 x double]* @y, i64 0, i64 1024), align 32 + %28 = load double* getelementptr inbounds ([1025 x [1025 x double]]* @a, i64 0, i64 1024, i64 1024), align 32 + %29 = fdiv double %27, %28 + store double %29, double* getelementptr inbounds ([1025 x double]* @x, i64 0, i64 1024), align 32 + br label %bb.nph + +bb.nph: ; preds = %bb24, %bb.nph14 + %storemerge210 = phi i64 [ 0, %bb.nph14 ], [ %37, %bb24 ] + %tmp14 = mul i64 %storemerge210, -1026 + %tmp15 = add i64 %tmp14, 1024 + %tmp18 = mul i64 %storemerge210, -1 + %tmp19 = add i64 %tmp18, 1024 + %tmp3 = add i64 %storemerge210, 1 + %tmp23 = add i64 %tmp18, 1023 + %scevgep100 = getelementptr [1025 x double]* @y, i64 0, i64 %tmp23 + %scevgep99 = getelementptr [1025 x double]* @x, i64 0, i64 %tmp23 + %tmp26 = add i64 %tmp14, 1023 + %scevgep97 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 1023, i64 %tmp26 + %30 = load double* %scevgep100, align 8 + br label %bb22 + +bb22: ; preds = %bb22, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb22 ] + %w.tmp.0 = phi double [ %30, %bb.nph ], [ %34, %bb22 ] + %tmp16 = add i64 %tmp15, %indvar + %scevgep83 = getelementptr [1025 x [1025 x double]]* @a, i64 0, i64 1023, i64 %tmp16 + %tmp20 = add i64 %tmp19, %indvar + %scevgep = getelementptr [1025 x double]* @x, i64 0, i64 %tmp20 + %31 = load double* %scevgep83, align 8 + %32 = load double* %scevgep, align 8 + %33 = fmul double %31, %32 + %34 = fsub double %w.tmp.0, %33 + %indvar.next = add i64 %indvar, 1 + %exitcond4 = icmp eq i64 %indvar.next, %tmp3 + br i1 %exitcond4, label %bb24, label %bb22 + +bb24: ; preds = %bb22 + %.lcssa = phi double [ %34, %bb22 ] + %35 = load double* %scevgep97, align 8 + %36 = fdiv double %.lcssa, %35 + store double %36, double* %scevgep99, align 8 + %37 = add nsw i64 %storemerge210, 1 + %exitcond13 = icmp eq i64 %37, 1024 + br i1 %exitcond13, label %return, label %bb.nph + +return: ; preds = %bb24 + %.lcssa.lcssa = phi double [ %.lcssa, %bb24 ] + store double %.lcssa.lcssa, double* @w + ret void +} +; CHECK: Valid Region for Scop: bb5.preheader => return diff --git a/polly/test/polybench/scripts/compile.sh b/polly/test/polybench/scripts/compile.sh new file mode 100755 index 00000000000..d3fffa2dd65 --- /dev/null +++ b/polly/test/polybench/scripts/compile.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +if [ $# -ne 3 ]; then + echo "Usage: compile.sh <compiler command> <input file> <output file>"; + exit 1; +fi; + +COMPILER_COMMAND="$1"; +INPUT_FILE="$2"; +OUTPUT_FILE="$3"; + +$COMPILER_COMMAND -DPOLYBENCH_TIME -lm -I utilities utilities/instrument.c $INPUT_FILE -o $OUTPUT_FILE + +exit 0; diff --git a/polly/test/polybench/scripts/runall.sh b/polly/test/polybench/scripts/runall.sh new file mode 100755 index 00000000000..05678d8e582 --- /dev/null +++ b/polly/test/polybench/scripts/runall.sh @@ -0,0 +1,40 @@ +#!/bin/sh + +if [ $# -ne 1 ]; then + echo "Usage: runall.sh <machine-acronym>"; + exit 1; +fi; + +## Default value for the compilation line. +if [ -z "$COMPILER_COMMAND" ]; then + COMPILER_COMMAND="gcc -O3 -fopenmp"; +fi; + +echo "Machine: $1"; +for i in `ls`; do + if [ -d "$i" ] && [ -f "$i/$i.c" ]; then + echo "Testing benchmark $i"; + rm -f data/$1-$i.dat + if [ -f "$i/compiler.opts" ]; then + read comp_opts < $i/compiler.opts; + COMPILER_F_COMMAND="$COMPILER_COMMAND $comp_opts"; + else + COMPILER_F_COMMAND="$COMPILER_COMMAND"; + fi; + for j in `find $i -name "*.c"`; do + echo "Testing $j"; + scripts/compile.sh "$COMPILER_F_COMMAND" "$j" "transfo" > /dev/null; + if [ $? -ne 0 ]; then + echo "Problem when compiling $j"; + else + val=`./transfo`; + if [ $? -ne 0 ]; then + echo "Problem when executing $j"; + else + echo "execution time: $val"; + echo "$j $val" >> data/$1-$i.dat + fi; + fi; + done; + fi; +done; diff --git a/polly/test/polybench/stencils/adi/adi.c b/polly/test/polybench/stencils/adi/adi.c new file mode 100755 index 00000000000..5a51f4a648b --- /dev/null +++ b/polly/test/polybench/stencils/adi/adi.c @@ -0,0 +1,147 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + +/* Default problem size. */ +#ifndef TSTEPS +# define TSTEPS 10 +#endif +#ifndef N +# define N 1024 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif +#ifndef DATA_PRINTF_MODIFIER +# define DATA_PRINTF_MODIFIER "%0.2lf " +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE X[N][N]; +DATA_TYPE A[N][N]; +DATA_TYPE B[N][N]; +#else +DATA_TYPE** X = (DATA_TYPE**)malloc(MAXGRID * sizeof(DATA_TYPE*)); +DATA_TYPE** A = (DATA_TYPE**)malloc(MAXGRID * sizeof(DATA_TYPE*)); +DATA_TYPE** B = (DATA_TYPE**)malloc(MAXGRID * sizeof(DATA_TYPE*)); +{ + int i; + for (i = 0; i < N; ++i) + { + X[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); + A[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); + B[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); + } +} +#endif + +inline +void init_array() +{ + int i, j; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + { + X[i][j] = ((DATA_TYPE) i*j + 1) / N; + A[i][j] = ((DATA_TYPE) i*j + 2) / N; + B[i][j] = ((DATA_TYPE) i*j + 3) / N; + } +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) { + fprintf(stderr, DATA_PRINTF_MODIFIER, A[i][j]); + if ((i * N + j) % 80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long n = N; +#else +void scop_func(long n) { +#endif + long t, i1, i2; + long tsteps = TSTEPS; +#pragma scop +#pragma live-out X + + for (t = 0; t < tsteps; t++) + { + for (i1 = 0; i1 < n; i1++) + for (i2 = 1; i2 < n; i2++) + { + X[i1][i2] = X[i1][i2] - X[i1][i2-1] * A[i1][i2] / B[i1][i2-1]; + B[i1][i2] = B[i1][i2] - A[i1][i2] * A[i1][i2] / B[i1][i2-1]; + } + + for (i1 = 0; i1 < n; i1++) + X[i1][n-1] = X[i1][n-1] / B[i1][n-1]; + + for (i1 = 0; i1 < n; i1++) + for (i2 = 0; i2 < n-2; i2++) + X[i1][n-i2-2] = (X[i1][n-2-i2] - X[i1][n-2-i2-1] * A[i1][n-i2-3]) / B[i1][n-3-i2]; + + for (i1 = 1; i1 < n; i1++) + for (i2 = 0; i2 < n; i2++) { + X[i1][i2] = X[i1][i2] - X[i1-1][i2] * A[i1][i2] / B[i1-1][i2]; + B[i1][i2] = B[i1][i2] - A[i1][i2] * A[i1][i2] / B[i1-1][i2]; + } + + for (i2 = 0; i2 < n; i2++) + X[n-1][i2] = X[n-1][i2] / B[n-1][i2]; + + for (i1 = 0; i1 < n-2; i1++) + for (i2 = 0; i2 < n; i2++) + X[n-2-i1][i2] = (X[n-2-i1][i2] - X[n-i1-3][i2] * A[n-3-i1][i2]) / B[n-2-i1][i2]; + } + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int t, i1, i2; + int n = N; + int tsteps = TSTEPS; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(n); +#endif + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/stencils/adi/adi_with_param.ll b/polly/test/polybench/stencils/adi/adi_with_param.ll new file mode 100644 index 00000000000..d37a08a8e8c --- /dev/null +++ b/polly/test/polybench/stencils/adi/adi_with_param.ll @@ -0,0 +1,251 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -print-top-scop-only -analyze %s | FileCheck %s +; XFAIL: * +; ModuleID = './stencils/adi/adi_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@X = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@A = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@B = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func(i64 %n) nounwind { +bb.nph81: + %0 = icmp sgt i64 %n, 0 + %1 = icmp sgt i64 %n, 1 + %2 = add nsw i64 %n, -2 + %3 = icmp sgt i64 %2, 0 + %4 = add nsw i64 %n, -3 + %tmp = add i64 %n, -1 + br label %bb5.preheader + +bb.nph: ; preds = %bb.nph.preheader, %bb4 + %storemerge112 = phi i64 [ %16, %bb4 ], [ 0, %bb.nph.preheader ] + %scevgep86.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %storemerge112, i64 0 + %scevgep85.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %storemerge112, i64 0 + %.pre = load double* %scevgep85.phi.trans.insert, align 32 + %.pre149 = load double* %scevgep86.phi.trans.insert, align 32 + br label %bb2 + +bb2: ; preds = %bb2, %bb.nph + %5 = phi double [ %.pre149, %bb.nph ], [ %15, %bb2 ] + %6 = phi double [ %.pre, %bb.nph ], [ %11, %bb2 ] + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp90, %bb2 ] + %tmp42 = add i64 %indvar, 1 + %scevgep84 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %storemerge112, i64 %tmp42 + %scevgep83 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %storemerge112, i64 %tmp42 + %scevgep = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %storemerge112, i64 %tmp42 + %tmp90 = add i64 %indvar, 1 + %7 = load double* %scevgep, align 8 + %8 = load double* %scevgep83, align 8 + %9 = fmul double %6, %8 + %10 = fdiv double %9, %5 + %11 = fsub double %7, %10 + store double %11, double* %scevgep, align 8 + %12 = load double* %scevgep84, align 8 + %13 = fmul double %8, %8 + %14 = fdiv double %13, %5 + %15 = fsub double %12, %14 + store double %15, double* %scevgep84, align 8 + %exitcond37 = icmp eq i64 %tmp90, %tmp + br i1 %exitcond37, label %bb4, label %bb2 + +bb4: ; preds = %bb2 + %16 = add nsw i64 %storemerge112, 1 + %exitcond = icmp eq i64 %16, %n + br i1 %exitcond, label %bb8.loopexit.loopexit, label %bb.nph + +bb.nph16: ; preds = %bb5.preheader + br i1 %1, label %bb.nph.preheader, label %bb8.loopexit + +bb.nph.preheader: ; preds = %bb.nph16 + br label %bb.nph + +bb7: ; preds = %bb7.preheader, %bb7 + %storemerge217 = phi i64 [ %20, %bb7 ], [ 0, %bb7.preheader ] + %scevgep96 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %storemerge217, i64 %tmp + %scevgep95 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %storemerge217, i64 %tmp + %17 = load double* %scevgep95, align 8 + %18 = load double* %scevgep96, align 8 + %19 = fdiv double %17, %18 + store double %19, double* %scevgep95, align 8 + %20 = add nsw i64 %storemerge217, 1 + %exitcond18 = icmp eq i64 %20, %n + br i1 %exitcond18, label %bb14.loopexit, label %bb7 + +bb8.loopexit.loopexit: ; preds = %bb4 + br label %bb8.loopexit + +bb8.loopexit: ; preds = %bb8.loopexit.loopexit, %bb.nph16 + br i1 %0, label %bb7.preheader, label %bb20.loopexit + +bb7.preheader: ; preds = %bb8.loopexit + br label %bb7 + +bb11: ; preds = %bb12.preheader, %bb11 + %storemerge920 = phi i64 [ %28, %bb11 ], [ 0, %bb12.preheader ] + %tmp30 = mul i64 %storemerge920, -1 + %tmp31 = add i64 %4, %tmp30 + %scevgep104 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %storemerge323, i64 %tmp31 + %scevgep103 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %storemerge323, i64 %tmp31 + %scevgep102 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %storemerge323, i64 %tmp31 + %tmp35 = add i64 %2, %tmp30 + %scevgep100 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %storemerge323, i64 %tmp35 + %21 = load double* %scevgep100, align 8 + %22 = load double* %scevgep102, align 8 + %23 = load double* %scevgep103, align 8 + %24 = fmul double %22, %23 + %25 = fsub double %21, %24 + %26 = load double* %scevgep104, align 8 + %27 = fdiv double %25, %26 + store double %27, double* %scevgep100, align 8 + %28 = add nsw i64 %storemerge920, 1 + %exitcond21 = icmp eq i64 %28, %2 + br i1 %exitcond21, label %bb13, label %bb11 + +bb13: ; preds = %bb11 + %29 = add nsw i64 %storemerge323, 1 + %exitcond29 = icmp eq i64 %29, %n + br i1 %exitcond29, label %bb20.loopexit.loopexit, label %bb12.preheader + +bb14.loopexit: ; preds = %bb7 + %.not = xor i1 %0, true + %.not150 = xor i1 %3, true + %brmerge = or i1 %.not, %.not150 + br i1 %brmerge, label %bb20.loopexit, label %bb12.preheader.preheader + +bb12.preheader.preheader: ; preds = %bb14.loopexit + br label %bb12.preheader + +bb12.preheader: ; preds = %bb12.preheader.preheader, %bb13 + %storemerge323 = phi i64 [ %29, %bb13 ], [ 0, %bb12.preheader.preheader ] + br label %bb11 + +bb17: ; preds = %bb18.preheader, %bb17 + %storemerge828 = phi i64 [ %41, %bb17 ], [ 0, %bb18.preheader ] + %scevgep119 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %indvar114, i64 %storemerge828 + %scevgep118 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %indvar114, i64 %storemerge828 + %scevgep121 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %tmp11, i64 %storemerge828 + %scevgep120 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp11, i64 %storemerge828 + %scevgep117 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %tmp11, i64 %storemerge828 + %30 = load double* %scevgep117, align 8 + %31 = load double* %scevgep118, align 8 + %32 = load double* %scevgep120, align 8 + %33 = fmul double %31, %32 + %34 = load double* %scevgep119, align 8 + %35 = fdiv double %33, %34 + %36 = fsub double %30, %35 + store double %36, double* %scevgep117, align 8 + %37 = load double* %scevgep121, align 8 + %38 = fmul double %32, %32 + %39 = fdiv double %38, %34 + %40 = fsub double %37, %39 + store double %40, double* %scevgep121, align 8 + %41 = add nsw i64 %storemerge828, 1 + %exitcond1 = icmp eq i64 %41, %n + br i1 %exitcond1, label %bb19, label %bb17 + +bb19: ; preds = %bb17 + %tmp125 = add i64 %indvar114, 1 + %exitcond8 = icmp eq i64 %tmp125, %tmp + br i1 %exitcond8, label %bb23.loopexit.loopexit, label %bb18.preheader + +bb20.loopexit.loopexit: ; preds = %bb13 + br label %bb20.loopexit + +bb20.loopexit: ; preds = %bb20.loopexit.loopexit, %bb5.preheader, %bb14.loopexit, %bb8.loopexit + br i1 %1, label %bb.nph34, label %bb23.loopexit + +bb.nph34: ; preds = %bb20.loopexit + br i1 %0, label %bb18.preheader.preheader, label %bb29.loopexit + +bb18.preheader.preheader: ; preds = %bb.nph34 + br label %bb18.preheader + +bb18.preheader: ; preds = %bb18.preheader.preheader, %bb19 + %indvar114 = phi i64 [ %tmp125, %bb19 ], [ 0, %bb18.preheader.preheader ] + %tmp11 = add i64 %indvar114, 1 + br label %bb17 + +bb22: ; preds = %bb22.preheader, %bb22 + %storemerge535 = phi i64 [ %45, %bb22 ], [ 0, %bb22.preheader ] + %scevgep131 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %tmp, i64 %storemerge535 + %scevgep130 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %tmp, i64 %storemerge535 + %42 = load double* %scevgep130, align 8 + %43 = load double* %scevgep131, align 8 + %44 = fdiv double %42, %43 + store double %44, double* %scevgep130, align 8 + %45 = add nsw i64 %storemerge535, 1 + %exitcond15 = icmp eq i64 %45, %n + br i1 %exitcond15, label %bb29.loopexit.loopexit, label %bb22 + +bb23.loopexit.loopexit: ; preds = %bb19 + br label %bb23.loopexit + +bb23.loopexit: ; preds = %bb23.loopexit.loopexit, %bb20.loopexit + br i1 %0, label %bb22.preheader, label %bb29.loopexit + +bb22.preheader: ; preds = %bb23.loopexit + br label %bb22 + +bb26: ; preds = %bb27.preheader, %bb26 + %storemerge737 = phi i64 [ %53, %bb26 ], [ 0, %bb27.preheader ] + %scevgep138 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp58, i64 %storemerge737 + %scevgep137 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %tmp58, i64 %storemerge737 + %scevgep139 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %tmp61, i64 %storemerge737 + %scevgep135 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %tmp61, i64 %storemerge737 + %46 = load double* %scevgep135, align 8 + %47 = load double* %scevgep137, align 8 + %48 = load double* %scevgep138, align 8 + %49 = fmul double %47, %48 + %50 = fsub double %46, %49 + %51 = load double* %scevgep139, align 8 + %52 = fdiv double %50, %51 + store double %52, double* %scevgep135, align 8 + %53 = add nsw i64 %storemerge737, 1 + %exitcond48 = icmp eq i64 %53, %n + br i1 %exitcond48, label %bb28, label %bb26 + +bb28: ; preds = %bb26 + %54 = add nsw i64 %storemerge640, 1 + %exitcond56 = icmp eq i64 %54, %2 + br i1 %exitcond56, label %bb30.loopexit, label %bb27.preheader + +bb29.loopexit.loopexit: ; preds = %bb22 + br label %bb29.loopexit + +bb29.loopexit: ; preds = %bb29.loopexit.loopexit, %bb23.loopexit, %bb.nph34 + %.not151 = xor i1 %3, true + %.not152 = xor i1 %0, true + %brmerge153 = or i1 %.not151, %.not152 + br i1 %brmerge153, label %bb30, label %bb27.preheader.preheader + +bb27.preheader.preheader: ; preds = %bb29.loopexit + br label %bb27.preheader + +bb27.preheader: ; preds = %bb27.preheader.preheader, %bb28 + %storemerge640 = phi i64 [ %54, %bb28 ], [ 0, %bb27.preheader.preheader ] + %tmp57 = mul i64 %storemerge640, -1 + %tmp58 = add i64 %4, %tmp57 + %tmp61 = add i64 %2, %tmp57 + br label %bb26 + +bb30.loopexit: ; preds = %bb28 + br label %bb30 + +bb30: ; preds = %bb30.loopexit, %bb29.loopexit + %55 = add nsw i64 %storemerge46, 1 + %exitcond64 = icmp eq i64 %55, 10 + br i1 %exitcond64, label %return, label %bb5.preheader + +bb5.preheader: ; preds = %bb30, %bb.nph81 + %storemerge46 = phi i64 [ 0, %bb.nph81 ], [ %55, %bb30 ] + br i1 %0, label %bb.nph16, label %bb20.loopexit + +return: ; preds = %bb30 + ret void +} diff --git a/polly/test/polybench/stencils/adi/adi_without_param.ll b/polly/test/polybench/stencils/adi/adi_without_param.ll new file mode 100644 index 00000000000..a6dd016800e --- /dev/null +++ b/polly/test/polybench/stencils/adi/adi_without_param.ll @@ -0,0 +1,200 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | FileCheck %s +; ModuleID = './stencils/adi/adi_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@X = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@A = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@B = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func() nounwind { +bb.nph79: + br label %bb5.preheader + +bb.nph: ; preds = %bb5.preheader, %bb4 + %storemerge112 = phi i64 [ %11, %bb4 ], [ 0, %bb5.preheader ] + %scevgep83.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %storemerge112, i64 0 + %scevgep82.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %storemerge112, i64 0 + %.pre = load double* %scevgep82.phi.trans.insert, align 32 + %.pre143 = load double* %scevgep83.phi.trans.insert, align 32 + br label %bb2 + +bb2: ; preds = %bb2, %bb.nph + %0 = phi double [ %.pre143, %bb.nph ], [ %10, %bb2 ] + %1 = phi double [ %.pre, %bb.nph ], [ %6, %bb2 ] + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp87, %bb2 ] + %tmp5 = add i64 %indvar, 1 + %scevgep81 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %storemerge112, i64 %tmp5 + %scevgep80 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %storemerge112, i64 %tmp5 + %scevgep = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %storemerge112, i64 %tmp5 + %tmp87 = add i64 %indvar, 1 + %2 = load double* %scevgep, align 8 + %3 = load double* %scevgep80, align 8 + %4 = fmul double %1, %3 + %5 = fdiv double %4, %0 + %6 = fsub double %2, %5 + store double %6, double* %scevgep, align 8 + %7 = load double* %scevgep81, align 8 + %8 = fmul double %3, %3 + %9 = fdiv double %8, %0 + %10 = fsub double %7, %9 + store double %10, double* %scevgep81, align 8 + %exitcond1 = icmp eq i64 %tmp87, 1023 + br i1 %exitcond1, label %bb4, label %bb2 + +bb4: ; preds = %bb2 + %11 = add nsw i64 %storemerge112, 1 + %exitcond = icmp eq i64 %11, 1024 + br i1 %exitcond, label %bb7.loopexit, label %bb.nph + +bb7.loopexit: ; preds = %bb4 + br label %bb7 + +bb7: ; preds = %bb7.loopexit, %bb7 + %storemerge217 = phi i64 [ %15, %bb7 ], [ 0, %bb7.loopexit ] + %scevgep93 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %storemerge217, i64 1023 + %scevgep92 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %storemerge217, i64 1023 + %12 = load double* %scevgep92, align 8 + %13 = load double* %scevgep93, align 8 + %14 = fdiv double %12, %13 + store double %14, double* %scevgep92, align 8 + %15 = add nsw i64 %storemerge217, 1 + %exitcond11 = icmp eq i64 %15, 1024 + br i1 %exitcond11, label %bb12.preheader.loopexit, label %bb7 + +bb11: ; preds = %bb12.preheader, %bb11 + %storemerge920 = phi i64 [ %23, %bb11 ], [ 0, %bb12.preheader ] + %tmp22 = mul i64 %storemerge920, -1 + %tmp23 = add i64 %tmp22, 1021 + %scevgep100 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %storemerge323, i64 %tmp23 + %scevgep99 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %storemerge323, i64 %tmp23 + %scevgep98 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %storemerge323, i64 %tmp23 + %tmp27 = add i64 %tmp22, 1022 + %scevgep96 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %storemerge323, i64 %tmp27 + %16 = load double* %scevgep96, align 8 + %17 = load double* %scevgep98, align 8 + %18 = load double* %scevgep99, align 8 + %19 = fmul double %17, %18 + %20 = fsub double %16, %19 + %21 = load double* %scevgep100, align 8 + %22 = fdiv double %20, %21 + store double %22, double* %scevgep96, align 8 + %23 = add nsw i64 %storemerge920, 1 + %exitcond14 = icmp eq i64 %23, 1022 + br i1 %exitcond14, label %bb13, label %bb11 + +bb13: ; preds = %bb11 + %24 = add nsw i64 %storemerge323, 1 + %exitcond21 = icmp eq i64 %24, 1024 + br i1 %exitcond21, label %bb18.preheader.loopexit, label %bb12.preheader + +bb12.preheader.loopexit: ; preds = %bb7 + br label %bb12.preheader + +bb12.preheader: ; preds = %bb12.preheader.loopexit, %bb13 + %storemerge323 = phi i64 [ %24, %bb13 ], [ 0, %bb12.preheader.loopexit ] + br label %bb11 + +bb17: ; preds = %bb18.preheader, %bb17 + %storemerge828 = phi i64 [ %36, %bb17 ], [ 0, %bb18.preheader ] + %scevgep114 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %indvar110, i64 %storemerge828 + %scevgep113 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %indvar110, i64 %storemerge828 + %scevgep116 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %tmp38, i64 %storemerge828 + %scevgep115 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp38, i64 %storemerge828 + %scevgep112 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %tmp38, i64 %storemerge828 + %25 = load double* %scevgep112, align 8 + %26 = load double* %scevgep113, align 8 + %27 = load double* %scevgep115, align 8 + %28 = fmul double %26, %27 + %29 = load double* %scevgep114, align 8 + %30 = fdiv double %28, %29 + %31 = fsub double %25, %30 + store double %31, double* %scevgep112, align 8 + %32 = load double* %scevgep116, align 8 + %33 = fmul double %27, %27 + %34 = fdiv double %33, %29 + %35 = fsub double %32, %34 + store double %35, double* %scevgep116, align 8 + %36 = add nsw i64 %storemerge828, 1 + %exitcond29 = icmp eq i64 %36, 1024 + br i1 %exitcond29, label %bb19, label %bb17 + +bb19: ; preds = %bb17 + %tmp120 = add i64 %indvar110, 1 + %exitcond35 = icmp eq i64 %tmp120, 1023 + br i1 %exitcond35, label %bb22.loopexit, label %bb18.preheader + +bb18.preheader.loopexit: ; preds = %bb13 + br label %bb18.preheader + +bb18.preheader: ; preds = %bb18.preheader.loopexit, %bb19 + %indvar110 = phi i64 [ %tmp120, %bb19 ], [ 0, %bb18.preheader.loopexit ] + %tmp38 = add i64 %indvar110, 1 + br label %bb17 + +bb22.loopexit: ; preds = %bb19 + br label %bb22 + +bb22: ; preds = %bb22.loopexit, %bb22 + %storemerge535 = phi i64 [ %40, %bb22 ], [ 0, %bb22.loopexit ] + %scevgep126 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 1023, i64 %storemerge535 + %scevgep125 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 1023, i64 %storemerge535 + %37 = load double* %scevgep125, align 8 + %38 = load double* %scevgep126, align 8 + %39 = fdiv double %37, %38 + store double %39, double* %scevgep125, align 8 + %40 = add nsw i64 %storemerge535, 1 + %exitcond42 = icmp eq i64 %40, 1024 + br i1 %exitcond42, label %bb27.preheader.loopexit, label %bb22 + +bb26: ; preds = %bb27.preheader, %bb26 + %storemerge737 = phi i64 [ %48, %bb26 ], [ 0, %bb27.preheader ] + %scevgep132 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp54, i64 %storemerge737 + %scevgep131 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %tmp54, i64 %storemerge737 + %scevgep133 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %tmp57, i64 %storemerge737 + %scevgep129 = getelementptr [1024 x [1024 x double]]* @X, i64 0, i64 %tmp57, i64 %storemerge737 + %41 = load double* %scevgep129, align 8 + %42 = load double* %scevgep131, align 8 + %43 = load double* %scevgep132, align 8 + %44 = fmul double %42, %43 + %45 = fsub double %41, %44 + %46 = load double* %scevgep133, align 8 + %47 = fdiv double %45, %46 + store double %47, double* %scevgep129, align 8 + %48 = add nsw i64 %storemerge737, 1 + %exitcond45 = icmp eq i64 %48, 1024 + br i1 %exitcond45, label %bb28, label %bb26 + +bb28: ; preds = %bb26 + %49 = add nsw i64 %storemerge639, 1 + %exitcond52 = icmp eq i64 %49, 1022 + br i1 %exitcond52, label %bb30, label %bb27.preheader + +bb27.preheader.loopexit: ; preds = %bb22 + br label %bb27.preheader + +bb27.preheader: ; preds = %bb27.preheader.loopexit, %bb28 + %storemerge639 = phi i64 [ %49, %bb28 ], [ 0, %bb27.preheader.loopexit ] + %tmp53 = mul i64 %storemerge639, -1 + %tmp54 = add i64 %tmp53, 1021 + %tmp57 = add i64 %tmp53, 1022 + br label %bb26 + +bb30: ; preds = %bb28 + %50 = add nsw i64 %storemerge44, 1 + %exitcond60 = icmp eq i64 %50, 10 + br i1 %exitcond60, label %return, label %bb5.preheader + +bb5.preheader: ; preds = %bb30, %bb.nph79 + %storemerge44 = phi i64 [ 0, %bb.nph79 ], [ %50, %bb30 ] + br label %bb.nph + +return: ; preds = %bb30 + ret void +} +; CHECK: Valid Region for Scop: bb5.preheader => return diff --git a/polly/test/polybench/stencils/jacobi-2d-imper/jacobi-2d-imper.c b/polly/test/polybench/stencils/jacobi-2d-imper/jacobi-2d-imper.c new file mode 100755 index 00000000000..791052d25f0 --- /dev/null +++ b/polly/test/polybench/stencils/jacobi-2d-imper/jacobi-2d-imper.c @@ -0,0 +1,123 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + +/* Default problem size. */ +#ifndef TSTEPS +# define TSTEPS 20 +#endif +#ifndef N +# define N 1024 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif +#ifndef DATA_PRINTF_MODIFIER +# define DATA_PRINTF_MODIFIER "%0.2lf " +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE A[N][N]; +DATA_TYPE B[N][N]; +#else +DATA_TYPE** A = (DATA_TYPE**)malloc(N * sizeof(DATA_TYPE*)); +DATA_TYPE** B = (DATA_TYPE**)malloc(N * sizeof(DATA_TYPE*)); +{ + int i; + for (i = 0; i < N; ++i) + { + A[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); + B[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); + } +} +#endif + +inline +void init_array() +{ + int i, j; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + { + A[i][j] = ((DATA_TYPE) i*j + 10) / N; + B[i][j] = ((DATA_TYPE) i*j + 11) / N; + } +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) { + fprintf(stderr, DATA_PRINTF_MODIFIER, A[i][j]); + if ((i * N + j) % 80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long n = N; +#else +void scop_func(long n) { +#endif + long t, i, j; + long tsteps = TSTEPS; + +#pragma scop +#pragma live-out A + + for (t = 0; t < tsteps; t++) + { + for (i = 2; i < n - 1; i++) + for (j = 2; j < n - 1; j++) + B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]); + for (i = 2; i < n-1; i++) + for (j = 2; j < n-1; j++) + A[i][j] = B[i][j]; + } + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int t, i, j; + int tsteps = TSTEPS; + int n = N; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(n); +#endif + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/stencils/jacobi-2d-imper/jacobi-2d-imper_with_param.ll b/polly/test/polybench/stencils/jacobi-2d-imper/jacobi-2d-imper_with_param.ll new file mode 100644 index 00000000000..58fa7a1ab81 --- /dev/null +++ b/polly/test/polybench/stencils/jacobi-2d-imper/jacobi-2d-imper_with_param.ll @@ -0,0 +1,109 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | FileCheck %s +; region-simplify causes: Non canonical PHI node found +; XFAIL:* + +; ModuleID = './stencils/jacobi-2d-imper/jacobi-2d-imper_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@B = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func(i64 %n) nounwind { +bb.nph35: + %0 = add nsw i64 %n, -1 + %1 = icmp sgt i64 %0, 2 + %tmp = add i64 %n, -3 + br label %bb5.preheader + +bb.nph: ; preds = %bb.nph.preheader, %bb4 + %indvar36 = phi i64 [ %tmp50, %bb4 ], [ 0, %bb.nph.preheader ] + %tmp13 = add i64 %indvar36, 1 + %tmp16 = add i64 %indvar36, 3 + %tmp18 = add i64 %indvar36, 2 + %scevgep40.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp18, i64 2 + %.pre = load double* %scevgep40.phi.trans.insert, align 16 + br label %bb2 + +bb2: ; preds = %bb2, %bb.nph + %2 = phi double [ %.pre, %bb.nph ], [ %5, %bb2 ] + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp58, %bb2 ] + %tmp14 = add i64 %indvar, 2 + %scevgep44 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp13, i64 %tmp14 + %scevgep42 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp16, i64 %tmp14 + %scevgep = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %tmp18, i64 %tmp14 + %tmp20 = add i64 %indvar, 3 + %scevgep48 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp18, i64 %tmp20 + %tmp22 = add i64 %indvar, 1 + %scevgep46 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp18, i64 %tmp22 + %tmp58 = add i64 %indvar, 1 + %3 = load double* %scevgep46, align 8 + %4 = fadd double %2, %3 + %5 = load double* %scevgep48, align 8 + %6 = fadd double %4, %5 + %7 = load double* %scevgep42, align 8 + %8 = fadd double %6, %7 + %9 = load double* %scevgep44, align 8 + %10 = fadd double %8, %9 + %11 = fmul double %10, 2.000000e-01 + store double %11, double* %scevgep, align 8 + %exitcond1 = icmp eq i64 %tmp58, %tmp + br i1 %exitcond1, label %bb4, label %bb2 + +bb4: ; preds = %bb2 + %tmp50 = add i64 %indvar36, 1 + %exitcond = icmp eq i64 %tmp50, %tmp + br i1 %exitcond, label %bb11.loopexit, label %bb.nph + +bb8: ; preds = %bb9.preheader, %bb8 + %indvar62 = phi i64 [ %indvar.next63, %bb8 ], [ 0, %bb9.preheader ] + %tmp32 = add i64 %indvar62, 2 + %scevgep70 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %tmp31, i64 %tmp32 + %scevgep69 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp31, i64 %tmp32 + %12 = load double* %scevgep70, align 8 + store double %12, double* %scevgep69, align 8 + %indvar.next63 = add i64 %indvar62, 1 + %exitcond25 = icmp eq i64 %indvar.next63, %tmp + br i1 %exitcond25, label %bb10, label %bb8 + +bb10: ; preds = %bb8 + %indvar.next66 = add i64 %indvar65, 1 + %exitcond30 = icmp eq i64 %indvar.next66, %tmp + br i1 %exitcond30, label %bb12.loopexit, label %bb9.preheader + +bb11.loopexit: ; preds = %bb4 + br i1 %1, label %bb9.preheader.preheader, label %bb12 + +bb9.preheader.preheader: ; preds = %bb11.loopexit + br label %bb9.preheader + +bb9.preheader: ; preds = %bb9.preheader.preheader, %bb10 + %indvar65 = phi i64 [ %indvar.next66, %bb10 ], [ 0, %bb9.preheader.preheader ] + %tmp31 = add i64 %indvar65, 2 + br label %bb8 + +bb12.loopexit: ; preds = %bb10 + br label %bb12 + +bb12: ; preds = %bb12.loopexit, %bb5.preheader, %bb11.loopexit + %13 = add nsw i64 %storemerge20, 1 + %exitcond35 = icmp eq i64 %13, 20 + br i1 %exitcond35, label %return, label %bb5.preheader + +bb5.preheader: ; preds = %bb12, %bb.nph35 + %storemerge20 = phi i64 [ 0, %bb.nph35 ], [ %13, %bb12 ] + br i1 %1, label %bb.nph.preheader, label %bb12 + +bb.nph.preheader: ; preds = %bb5.preheader + br label %bb.nph + +return: ; preds = %bb12 + ret void +} +; CHECK: Valid Region for Scop: bb5.preheader => return + diff --git a/polly/test/polybench/stencils/jacobi-2d-imper/jacobi-2d-imper_without_param.ll b/polly/test/polybench/stencils/jacobi-2d-imper/jacobi-2d-imper_without_param.ll new file mode 100644 index 00000000000..6d4f35992cc --- /dev/null +++ b/polly/test/polybench/stencils/jacobi-2d-imper/jacobi-2d-imper_without_param.ll @@ -0,0 +1,93 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | FileCheck %s +; ModuleID = './stencils/jacobi-2d-imper/jacobi-2d-imper_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@B = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func() nounwind { +bb.nph35: + br label %bb5.preheader + +bb.nph: ; preds = %bb5.preheader, %bb4 + %indvar36 = phi i64 [ %tmp49, %bb4 ], [ 0, %bb5.preheader ] + %tmp12 = add i64 %indvar36, 1 + %tmp15 = add i64 %indvar36, 3 + %tmp17 = add i64 %indvar36, 2 + %scevgep39.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp17, i64 2 + %.pre = load double* %scevgep39.phi.trans.insert, align 16 + br label %bb2 + +bb2: ; preds = %bb2, %bb.nph + %0 = phi double [ %.pre, %bb.nph ], [ %3, %bb2 ] + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp57, %bb2 ] + %tmp13 = add i64 %indvar, 2 + %scevgep43 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp12, i64 %tmp13 + %scevgep41 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp15, i64 %tmp13 + %scevgep = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %tmp17, i64 %tmp13 + %tmp19 = add i64 %indvar, 3 + %scevgep47 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp17, i64 %tmp19 + %tmp21 = add i64 %indvar, 1 + %scevgep45 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp17, i64 %tmp21 + %tmp57 = add i64 %indvar, 1 + %1 = load double* %scevgep45, align 8 + %2 = fadd double %0, %1 + %3 = load double* %scevgep47, align 8 + %4 = fadd double %2, %3 + %5 = load double* %scevgep41, align 8 + %6 = fadd double %4, %5 + %7 = load double* %scevgep43, align 8 + %8 = fadd double %6, %7 + %9 = fmul double %8, 2.000000e-01 + store double %9, double* %scevgep, align 8 + %exitcond1 = icmp eq i64 %tmp57, 1021 + br i1 %exitcond1, label %bb4, label %bb2 + +bb4: ; preds = %bb2 + %tmp49 = add i64 %indvar36, 1 + %exitcond = icmp eq i64 %tmp49, 1021 + br i1 %exitcond, label %bb9.preheader.loopexit, label %bb.nph + +bb8: ; preds = %bb9.preheader, %bb8 + %indvar61 = phi i64 [ %indvar.next62, %bb8 ], [ 0, %bb9.preheader ] + %tmp30 = add i64 %indvar61, 2 + %scevgep68 = getelementptr [1024 x [1024 x double]]* @B, i64 0, i64 %tmp29, i64 %tmp30 + %scevgep67 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp29, i64 %tmp30 + %10 = load double* %scevgep68, align 8 + store double %10, double* %scevgep67, align 8 + %indvar.next62 = add i64 %indvar61, 1 + %exitcond24 = icmp eq i64 %indvar.next62, 1021 + br i1 %exitcond24, label %bb10, label %bb8 + +bb10: ; preds = %bb8 + %indvar.next65 = add i64 %indvar64, 1 + %exitcond28 = icmp eq i64 %indvar.next65, 1021 + br i1 %exitcond28, label %bb12, label %bb9.preheader + +bb9.preheader.loopexit: ; preds = %bb4 + br label %bb9.preheader + +bb9.preheader: ; preds = %bb9.preheader.loopexit, %bb10 + %indvar64 = phi i64 [ %indvar.next65, %bb10 ], [ 0, %bb9.preheader.loopexit ] + %tmp29 = add i64 %indvar64, 2 + br label %bb8 + +bb12: ; preds = %bb10 + %11 = add nsw i64 %storemerge20, 1 + %exitcond33 = icmp eq i64 %11, 20 + br i1 %exitcond33, label %return, label %bb5.preheader + +bb5.preheader: ; preds = %bb12, %bb.nph35 + %storemerge20 = phi i64 [ 0, %bb.nph35 ], [ %11, %bb12 ] + br label %bb.nph + +return: ; preds = %bb12 + ret void +} +; CHECK: Valid Region for Scop: bb5.preheader => return diff --git a/polly/test/polybench/stencils/seidel/seidel.c b/polly/test/polybench/stencils/seidel/seidel.c new file mode 100755 index 00000000000..f86d83cb61a --- /dev/null +++ b/polly/test/polybench/stencils/seidel/seidel.c @@ -0,0 +1,112 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + +/* Default problem size. */ +#ifndef TSTEPS +# define TSTEPS 20 +#endif +#ifndef N +# define N 1024 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif +#ifndef DATA_PRINTF_MODIFIER +# define DATA_PRINTF_MODIFIER "%0.2lf " +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE A[N][N]; +#else +DATA_TYPE** A = (DATA_TYPE**)malloc(N * sizeof(DATA_TYPE*)); +{ + int i; + for (i = 0; i < N; ++i) + A[i] = (DATA_TYPE*)malloc(N * sizeof(DATA_TYPE)); +} +#endif + +inline +void init_array() +{ + int i, j; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + A[i][j] = ((DATA_TYPE) i*j + 10) / N; +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) { + fprintf(stderr, DATA_PRINTF_MODIFIER, A[i][j]); + if ((i * N + j) % 80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + +#ifndef SCOP_PARAM +void scop_func() { + long n = N; +#else +void scop_func(long n) { +#endif + long t, i, j; + long tsteps = TSTEPS; + +#pragma scop +#pragma live-out A + + for (t = 0; t <= tsteps - 1; t++) + for (i = 1; i<= n - 2; i++) + for (j = 1; j <= n - 2; j++) + A[i][j] = (A[i-1][j-1] + A[i-1][j] + A[i-1][j+1] + + A[i][j-1] + A[i][j] + A[i][j+1] + + A[i+1][j-1] + A[i+1][j] + A[i+1][j+1])/9.0; + +#pragma endscop +} + +int main(int argc, char** argv) +{ + int t, i, j; + int tsteps = TSTEPS; + int n = N; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#ifndef SCOP_PARAM + scop_func(); +#else + scop_func(n); +#endif + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} diff --git a/polly/test/polybench/stencils/seidel/seidel_with_param.ll b/polly/test/polybench/stencils/seidel/seidel_with_param.ll new file mode 100644 index 00000000000..4c45833855a --- /dev/null +++ b/polly/test/polybench/stencils/seidel/seidel_with_param.ll @@ -0,0 +1,96 @@ +; RUN: opt %loadPolly %defaultOpts -polly-analyze-ir -print-top-scop-only -analyze %s | FileCheck %s +; XFAIL: * +; ModuleID = './stencils/seidel/seidel_with_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func(i64 %n) nounwind { +bb.nph20: + %0 = add nsw i64 %n, -2 + %1 = icmp slt i64 %0, 1 + br i1 %1, label %return, label %bb.nph8.preheader + +bb.nph8.preheader: ; preds = %bb.nph20 + br label %bb.nph8 + +bb.nph: ; preds = %bb.nph.preheader, %bb4 + %indvar21 = phi i64 [ %tmp39, %bb4 ], [ 0, %bb.nph.preheader ] + %tmp5 = add i64 %indvar21, 1 + %tmp43 = add i64 %indvar21, 2 + %scevgep26.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %indvar21, i64 1 + %scevgep.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp43, i64 1 + %scevgep30.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp5, i64 0 + %scevgep25.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp5, i64 1 + %tmp39 = add i64 %indvar21, 1 + %.pre = load double* %scevgep26.phi.trans.insert, align 8 + %.pre47 = load double* %scevgep25.phi.trans.insert, align 8 + %.pre48 = load double* %scevgep.phi.trans.insert, align 8 + %.pre49 = load double* %scevgep30.phi.trans.insert, align 32 + br label %bb2 + +bb2: ; preds = %bb2, %bb.nph + %2 = phi double [ %.pre49, %bb.nph ], [ %19, %bb2 ] + %3 = phi double [ %.pre48, %bb.nph ], [ %17, %bb2 ] + %4 = phi double [ %.pre47, %bb.nph ], [ %12, %bb2 ] + %5 = phi double [ %.pre, %bb.nph ], [ %8, %bb2 ] + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp37, %bb2 ] + %tmp4 = add i64 %indvar, 2 + %scevgep29 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %indvar21, i64 %tmp4 + %scevgep27 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %indvar21, i64 %indvar + %scevgep31 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp5, i64 %tmp4 + %tmp6 = add i64 %indvar, 1 + %scevgep25 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp5, i64 %tmp6 + %scevgep33 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp43, i64 %tmp4 + %scevgep32 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp43, i64 %indvar + %tmp34 = add i64 %indvar, 2 + %tmp37 = add i64 %indvar, 1 + %6 = load double* %scevgep27, align 8 + %7 = fadd double %6, %5 + %8 = load double* %scevgep29, align 8 + %9 = fadd double %7, %8 + %10 = fadd double %9, %2 + %11 = fadd double %10, %4 + %12 = load double* %scevgep31, align 8 + %13 = fadd double %11, %12 + %14 = load double* %scevgep32, align 8 + %15 = fadd double %13, %14 + %16 = fadd double %15, %3 + %17 = load double* %scevgep33, align 8 + %18 = fadd double %16, %17 + %19 = fdiv double %18, 9.000000e+00 + store double %19, double* %scevgep25, align 8 + %20 = icmp slt i64 %0, %tmp34 + br i1 %20, label %bb4, label %bb2 + +bb4: ; preds = %bb2 + %21 = icmp slt i64 %0, %tmp43 + br i1 %21, label %bb6.loopexit, label %bb.nph + +bb.nph8: ; preds = %bb.nph8.preheader, %bb6 + %storemerge9 = phi i64 [ %22, %bb6 ], [ 0, %bb.nph8.preheader ] + br i1 %1, label %bb6, label %bb.nph.preheader + +bb.nph.preheader: ; preds = %bb.nph8 + br label %bb.nph + +bb6.loopexit: ; preds = %bb4 + br label %bb6 + +bb6: ; preds = %bb6.loopexit, %bb.nph8 + %22 = add nsw i64 %storemerge9, 1 + %exitcond8 = icmp eq i64 %22, 20 + br i1 %exitcond8, label %return.loopexit, label %bb.nph8 + +return.loopexit: ; preds = %bb6 + br label %return + +return: ; preds = %return.loopexit, %bb.nph20 + ret void +} diff --git a/polly/test/polybench/stencils/seidel/seidel_without_param.ll b/polly/test/polybench/stencils/seidel/seidel_without_param.ll new file mode 100644 index 00000000000..1a13b3391bc --- /dev/null +++ b/polly/test/polybench/stencils/seidel/seidel_without_param.ll @@ -0,0 +1,81 @@ +; RUN: opt %loadPolly %defaultOpts -polly-detect -analyze %s | FileCheck %s +; ModuleID = './stencils/seidel/seidel_without_param.ll' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@A = common global [1024 x [1024 x double]] zeroinitializer, align 32 +@stderr = external global %struct._IO_FILE* +@.str = private constant [8 x i8] c"%0.2lf \00", align 1 + +define void @scop_func() nounwind { +bb.nph20.bb.nph20.split_crit_edge: + br label %bb5.preheader + +bb.nph: ; preds = %bb5.preheader, %bb4 + %indvar21 = phi i64 [ %tmp40, %bb4 ], [ 0, %bb5.preheader ] + %tmp6 = add i64 %indvar21, 1 + %tmp8 = add i64 %indvar21, 2 + %scevgep26.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %indvar21, i64 1 + %scevgep.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp8, i64 1 + %scevgep30.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp6, i64 0 + %scevgep25.phi.trans.insert = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp6, i64 1 + %tmp40 = add i64 %indvar21, 1 + %.pre = load double* %scevgep26.phi.trans.insert, align 8 + %.pre49 = load double* %scevgep25.phi.trans.insert, align 8 + %.pre50 = load double* %scevgep.phi.trans.insert, align 8 + %.pre51 = load double* %scevgep30.phi.trans.insert, align 32 + br label %bb2 + +bb2: ; preds = %bb2, %bb.nph + %0 = phi double [ %.pre51, %bb.nph ], [ %17, %bb2 ] + %1 = phi double [ %.pre50, %bb.nph ], [ %15, %bb2 ] + %2 = phi double [ %.pre49, %bb.nph ], [ %10, %bb2 ] + %3 = phi double [ %.pre, %bb.nph ], [ %6, %bb2 ] + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp38, %bb2 ] + %tmp5 = add i64 %indvar, 2 + %scevgep29 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %indvar21, i64 %tmp5 + %scevgep27 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %indvar21, i64 %indvar + %scevgep31 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp6, i64 %tmp5 + %tmp7 = add i64 %indvar, 1 + %scevgep25 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp6, i64 %tmp7 + %scevgep33 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp8, i64 %tmp5 + %scevgep32 = getelementptr [1024 x [1024 x double]]* @A, i64 0, i64 %tmp8, i64 %indvar + %tmp38 = add i64 %indvar, 1 + %4 = load double* %scevgep27, align 8 + %5 = fadd double %4, %3 + %6 = load double* %scevgep29, align 8 + %7 = fadd double %5, %6 + %8 = fadd double %7, %0 + %9 = fadd double %8, %2 + %10 = load double* %scevgep31, align 8 + %11 = fadd double %9, %10 + %12 = load double* %scevgep32, align 8 + %13 = fadd double %11, %12 + %14 = fadd double %13, %1 + %15 = load double* %scevgep33, align 8 + %16 = fadd double %14, %15 + %17 = fdiv double %16, 9.000000e+00 + store double %17, double* %scevgep25, align 8 + %exitcond1 = icmp eq i64 %tmp38, 1022 + br i1 %exitcond1, label %bb4, label %bb2 + +bb4: ; preds = %bb2 + %exitcond = icmp eq i64 %tmp40, 1022 + br i1 %exitcond, label %bb6, label %bb.nph + +bb6: ; preds = %bb4 + %18 = add nsw i64 %storemerge9, 1 + %exitcond9 = icmp eq i64 %18, 20 + br i1 %exitcond9, label %return, label %bb5.preheader + +bb5.preheader: ; preds = %bb6, %bb.nph20.bb.nph20.split_crit_edge + %storemerge9 = phi i64 [ 0, %bb.nph20.bb.nph20.split_crit_edge ], [ %18, %bb6 ] + br label %bb.nph + +return: ; preds = %bb6 + ret void +} +; CHECK: Valid Region for Scop: bb5.preheader => return diff --git a/polly/test/polybench/utilities/instrument.c b/polly/test/polybench/utilities/instrument.c new file mode 100755 index 00000000000..8b2b64d51ad --- /dev/null +++ b/polly/test/polybench/utilities/instrument.c @@ -0,0 +1,87 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <assert.h> +#include <time.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sched.h> +#include <math.h> + +#ifndef POLYBENCH_CACHE_SIZE_KB +# define POLYBENCH_CACHE_SIZE_KB 8192 +#endif + +/* Timer code (gettimeofday). */ +double polybench_t_start, polybench_t_end; + +static +inline +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + +inline +void polybench_flush_cache() +{ + int cs = POLYBENCH_CACHE_SIZE_KB * 1024 / sizeof(double); + double* flush = (double*) calloc(cs, sizeof(double)); + int i; + double tmp = 0.0; + for (i = 0; i < cs; i++) + tmp += flush[i]; + /* This prevents DCE on the cache flush code. */ + assert (tmp <= 10.0); +} + +#ifdef POLYBENCH_LINUX_FIFO_SCHEDULER +inline +void polybench_linux_fifo_scheduler() +{ + /* Use FIFO scheduler to limit OS interference. Program must be run + as root, and this works only for Linux kernels. */ + struct sched_param schedParam; + schedParam.sched_priority = sched_get_priority_max(SCHED_FIFO); + sched_setscheduler(0, SCHED_FIFO, &schedParam); +} + +inline +void polybench_linux_standard_scheduler() +{ + /* Restore to standard scheduler policy. */ + struct sched_param schedParam; + schedParam.sched_priority = sched_get_priority_max(SCHED_OTHER); + sched_setscheduler(0, SCHED_OTHER, &schedParam); +} +#endif + +void polybench_timer_start() +{ +#ifndef POLYBENCH_NO_FLUSH_CACHE + polybench_flush_cache(); +#endif +#ifdef POLYBENCH_LINUX_FIFO_SCHEDULER + polybench_linux_fifo_scheduler(); +#endif + polybench_t_start = rtclock(); +} + +void polybench_timer_stop() +{ +#ifdef POLYBENCH_LINUX_FIFO_SCHEDULER + polybench_linux_standard_scheduler(); +#endif + polybench_t_end = rtclock(); +} + +void polybench_timer_print() +{ + printf("%0.6lfs\n", polybench_t_end - polybench_t_start); +} diff --git a/polly/test/polybench/utilities/instrument.h b/polly/test/polybench/utilities/instrument.h new file mode 100755 index 00000000000..ef3c1c1faa1 --- /dev/null +++ b/polly/test/polybench/utilities/instrument.h @@ -0,0 +1,25 @@ +#include <stdio.h> +#include <unistd.h> +#include <sys/time.h> +#include <unistd.h> +#include <sys/time.h> +#include <math.h> + + +#define polybench_start_instruments +#define polybench_stop_instruments +#define polybench_print_instruments + +#ifdef POLYBENCH_TIME +# undef polybench_start_instruments +# undef polybench_stop_instruments +# undef polybench_print_instruments +# define polybench_start_instruments polybench_timer_start(); +# define polybench_stop_instruments polybench_timer_stop(); +# define polybench_print_instruments polybench_timer_print(); +#endif + + +extern void polybench_timer_start(); +extern void polybench_timer_stop(); +extern void polybench_timer_print(); diff --git a/polly/test/polybench/utilities/template-for-new-benchmark.c b/polly/test/polybench/utilities/template-for-new-benchmark.c new file mode 100755 index 00000000000..3c891ea9072 --- /dev/null +++ b/polly/test/polybench/utilities/template-for-new-benchmark.c @@ -0,0 +1,99 @@ +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <math.h> + +#include "instrument.h" + + +/* Default problem size. */ +#ifndef NX +# define NX 8000 +#endif +#ifnef NY +# define NY 8000 +#endif + +/* Default data type is double. */ +#ifndef DATA_TYPE +# define DATA_TYPE double +#endif + +/* Array declaration. Enable malloc if POLYBENCH_TEST_MALLOC. */ +#ifndef POLYBENCH_TEST_MALLOC +DATA_TYPE A[nx][ny]; +DATA_TYPE x[ny]; +DATA_TYPE y[ny]; +DATA_TYPE tmp[nx]; +#else +DATA_TYPE** A = (DATA_TYPE**)malloc(nx * sizeof(DATA_TYPE*)); +DATA_TYPE* x = (DATA_TYPE*)malloc(ny * sizeof(DATA_TYPE)); +DATA_TYPE* y = (DATA_TYPE*)malloc(ny * sizeof(DATA_TYPE)); +DATA_TYPE* tmp = (DATA_TYPE*)malloc(nx * sizeof(DATA_TYPE)); +{ + int i; + for (i = 0; i < nx; ++i) + A[i] = (DATA_TYPE*)malloc(ny * sizeof(DATA_TYPE)); +} +#endif + +inline +void init_array() +{ + int i, j; + + for (i = 0; i < nx; i++) + { + x[i] = i * M_PI; + for (j = 0; j < ny; j++) + A[i][j] = ((DATA_TYPE) i*j) / nx; + } +} + +/* Define the live-out variables. Code is not executed unless + POLYBENCH_DUMP_ARRAYS is defined. */ +inline +void print_array(int argc, char** argv) +{ + int i, j; +#ifndef POLYBENCH_DUMP_ARRAYS + if (argc > 42 && ! strcmp(argv[0], "")) +#endif + { + for (i = 0; i < nx; i++) { + fprintf(stderr, "%0.2lf ", y[i]); + if (i%80 == 20) fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } +} + + +int main(int argc, char** argv) +{ + int i, j; + int nx = NX; + int ny = NY; + + /* Initialize array. */ + init_array(); + + /* Start timer. */ + polybench_start_instruments; + +#pragma scop +#pragma live-out + + + + +#pragma endscop + + /* Stop and print timer. */ + polybench_stop_instruments; + polybench_print_instruments; + + print_array(argc, argv); + + return 0; +} |

