summaryrefslogtreecommitdiffstats
path: root/polly/test/GPGPU/host-statement.ll
diff options
context:
space:
mode:
Diffstat (limited to 'polly/test/GPGPU/host-statement.ll')
-rw-r--r--polly/test/GPGPU/host-statement.ll12
1 files changed, 7 insertions, 5 deletions
diff --git a/polly/test/GPGPU/host-statement.ll b/polly/test/GPGPU/host-statement.ll
index ded1d5834a8..73b0ea589e7 100644
--- a/polly/test/GPGPU/host-statement.ll
+++ b/polly/test/GPGPU/host-statement.ll
@@ -1,8 +1,10 @@
; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
+; RUN: -polly-invariant-load-hoisting=false \
; RUN: -disable-output < %s | \
; RUN: FileCheck -check-prefix=CODE %s
; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
+; RUN: -polly-invariant-load-hoisting=false \
; RUN: -disable-output < %s | \
; RUN: FileCheck -check-prefix=KERNEL-IR %s
@@ -16,15 +18,16 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
; This test case tests that we can correctly handle a ScopStmt that is
; scheduled on the host, instead of within a kernel.
-; CODE: Code
+; CODE-LABEL: Code
; CODE-NEXT: ====
; CODE-NEXT: # host
; CODE-NEXT: {
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
+; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_R, MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyHostToDevice));
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(16);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_Q, p_0, p_1);
+; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@@ -39,14 +42,13 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
; CODE: {
; CODE-NEXT: dim3 k2_dimBlock(16, 32);
; CODE-NEXT: dim3 k2_dimGrid(16, p_1 <= -7650 ? 256 : -p_1 + floord(31 * p_1 + 30, 32) + 16);
-; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
+; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
; CODE: }
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
-; CODE-NEXT: if (p_0 <= 510 && p_1 <= 510)
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
+; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_Q, dev_MemRef_Q, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
; CODE-NEXT: Stmt_for_cond33_preheader();
OpenPOWER on IntegriCloud