diff options
| author | Tobias Grosser <tobias@grosser.es> | 2016-08-09 15:35:03 +0000 |
|---|---|---|
| committer | Tobias Grosser <tobias@grosser.es> | 2016-08-09 15:35:03 +0000 |
| commit | 750160e2605af4212727e254f863bbbbe0353174 (patch) | |
| tree | 67fb5aa892e43832ff1c9cfc570a859c3ec23ca6 | |
| parent | 776700d0b7cfa67e99d07af5cd9adf0ae7846fff (diff) | |
| download | bcm5719-llvm-750160e2605af4212727e254f863bbbbe0353174.tar.gz bcm5719-llvm-750160e2605af4212727e254f863bbbbe0353174.zip | |
[GPGPU] Use separate basic block for GPU initialization code
This increases the readability of the IR and also clarifies that the GPU
inititialization is executed _after_ the scalar initialization which needs
to before the code of the transformed scop is executed.
Besides increased readability, the IR should not change. Specifically, I
do not expect any changes in program semantics due to this patch.
llvm-svn: 278125
| -rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 6 | ||||
| -rw-r--r-- | polly/test/GPGPU/double-parallel-loop.ll | 3 |
2 files changed, 9 insertions, 0 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index d351e802ab2..32858fc213d 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -35,6 +35,7 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "isl/union_map.h" @@ -436,6 +437,11 @@ private: }; void GPUNodeBuilder::initializeAfterRTH() { + BasicBlock *NewBB = SplitBlock(Builder.GetInsertBlock(), + &*Builder.GetInsertPoint(), &DT, &LI); + NewBB->setName("polly.acc.initialize"); + Builder.SetInsertPoint(&NewBB->front()); + GPUContext = createCallInitContext(); allocateDeviceArrays(); } diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll index 994641774dc..da4a7fc0df8 100644 --- a/polly/test/GPGPU/double-parallel-loop.ll +++ b/polly/test/GPGPU/double-parallel-loop.ll @@ -92,6 +92,9 @@ ; IR-NEXT: br i1 true, label %polly.start, label %bb2 ; IR: polly.start: +; IR-NEXT: br label %polly.acc.initialize + +; IR: polly.acc.initialize: ; IR-NEXT: [[GPUContext:%.*]] = call i8* @polly_initContext() ; IR-NEXT: %p_dev_array_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304) ; IR-NEXT: [[HostPtr:%.*]] = bitcast [1024 x float]* %A to i8* |

