diff options
-rw-r--r-- | polly/include/polly/CodeGen/BlockGenerators.h | 12 | ||||
-rw-r--r-- | polly/lib/CodeGen/BlockGenerators.cpp | 34 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/simple_vec_stride_one.ll | 37 |
3 files changed, 83 insertions, 0 deletions
diff --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h index 9287717eaff..257361c8f4a 100644 --- a/polly/include/polly/CodeGen/BlockGenerators.h +++ b/polly/include/polly/CodeGen/BlockGenerators.h @@ -709,6 +709,18 @@ private: bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap); + /// @brief Generate vector loads for scalars. + /// + /// @param Stmt The scop statement for which to generate the loads. + /// @param VectorBlockMap A map that will be updated to relate the original + /// values with the newly generated vector loads. + void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap); + + /// @brief Verify absence of scalar stores. + /// + /// @param Stmt The scop statement to check for scalar stores. + void verifyNoScalarStores(ScopStmt &Stmt); + /// @param NewAccesses A map from memory access ids to new ast expressions, /// which may contain new access expressions for certain /// memory accesses. diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index b4364df7fda..0fe0f28df52 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -966,6 +966,36 @@ void VectorBlockGenerator::copyInstruction( copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses); } +void VectorBlockGenerator::generateScalarVectorLoads( + ScopStmt &Stmt, ValueMapT &VectorBlockMap) { + for (MemoryAccess *MA : Stmt) { + if (MA->isArrayKind() || MA->isWrite()) + continue; + + auto *Address = getOrCreateAlloca(*MA); + Type *VectorPtrType = getVectorPtrTy(Address, 1); + Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType, + Address->getName() + "_p_vec_p"); + auto *Val = Builder.CreateLoad(VectorPtr, Address->getName() + ".reload"); + Constant *SplatVector = Constant::getNullValue( + VectorType::get(Builder.getInt32Ty(), getVectorWidth())); + + Value *VectorVal = Builder.CreateShuffleVector( + Val, Val, SplatVector, Address->getName() + "_p_splat"); + VectorBlockMap[MA->getBaseAddr()] = VectorVal; + VectorVal->dump(); + } +} + +void VectorBlockGenerator::verifyNoScalarStores(ScopStmt &Stmt) { + for (MemoryAccess *MA : Stmt) { + if (MA->isArrayKind() || MA->isRead()) + continue; + + llvm_unreachable("Scalar stores not expected in vector loop"); + } +} + void VectorBlockGenerator::copyStmt( ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) { assert(Stmt.isBlockStmt() && "TODO: Only block statements can be copied by " @@ -994,8 +1024,12 @@ void VectorBlockGenerator::copyStmt( VectorValueMapT ScalarBlockMap(getVectorWidth()); ValueMapT VectorBlockMap; + generateScalarVectorLoads(Stmt, VectorBlockMap); + for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap, NewAccesses); + + verifyNoScalarStores(Stmt); } BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB, diff --git a/polly/test/Isl/CodeGen/simple_vec_stride_one.ll b/polly/test/Isl/CodeGen/simple_vec_stride_one.ll new file mode 100644 index 00000000000..de3f37127eb --- /dev/null +++ b/polly/test/Isl/CodeGen/simple_vec_stride_one.ll @@ -0,0 +1,37 @@ +; RUN: opt %loadPolly -polly-codegen -polly-vectorizer=polly \ +; RUN: < %s -S | FileCheck %s + +; CHECK: store <4 x double> %val.s2a_p_splat, <4 x double>* %vector_ptr +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @update_access_functions(i64 %arg, double* %A, double* %B) { +bb3: + br label %loop1 + +loop1: + %indvar = phi i64 [ %indvar.next, %loop1 ], [ 0, %bb3 ] + %ptr1 = getelementptr inbounds double, double* %A, i64 %indvar + store double 42.0, double* %ptr1, align 8 + %indvar.next = add nuw nsw i64 %indvar, 1 + %cmp = icmp ne i64 %indvar.next, 4 + br i1 %cmp, label %loop1, label %loop2 + +loop2: + %indvar.2 = phi i64 [ %indvar.2.next, %loop2 ], [ 0, %loop1 ] + %ptr2 = getelementptr inbounds double, double* %A, i64 %indvar.2 + %val = load double, double* %ptr2, align 8 + %indvar.2.next = add nuw nsw i64 %indvar.2, 1 + %cmp.2 = icmp ne i64 %indvar.2.next, 4 + br i1 %cmp.2, label %loop2, label %loop3 + +loop3: + %indvar.3 = phi i64 [ %indvar.3.next, %loop3 ], [ 0, %loop2 ] + %ptr3 = getelementptr inbounds double, double* %A, i64 %indvar.3 + store double %val, double* %ptr3, align 8 + %indvar.3.next = add nuw nsw i64 %indvar.3, 1 + %cmp.3 = icmp ne i64 %indvar.3.next, 4 + br i1 %cmp.3, label %loop3, label %exit + +exit: + ret void +} |