summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/include/polly/CodeGen/BlockGenerators.h12
-rw-r--r--polly/lib/CodeGen/BlockGenerators.cpp34
-rw-r--r--polly/test/Isl/CodeGen/simple_vec_stride_one.ll37
3 files changed, 83 insertions, 0 deletions
diff --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h
index 9287717eaff..257361c8f4a 100644
--- a/polly/include/polly/CodeGen/BlockGenerators.h
+++ b/polly/include/polly/CodeGen/BlockGenerators.h
@@ -709,6 +709,18 @@ private:
bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
+ /// @brief Generate vector loads for scalars.
+ ///
+ /// @param Stmt The scop statement for which to generate the loads.
+ /// @param VectorBlockMap A map that will be updated to relate the original
+ /// values with the newly generated vector loads.
+ void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap);
+
+ /// @brief Verify absence of scalar stores.
+ ///
+ /// @param Stmt The scop statement to check for scalar stores.
+ void verifyNoScalarStores(ScopStmt &Stmt);
+
/// @param NewAccesses A map from memory access ids to new ast expressions,
/// which may contain new access expressions for certain
/// memory accesses.
diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp
index b4364df7fda..0fe0f28df52 100644
--- a/polly/lib/CodeGen/BlockGenerators.cpp
+++ b/polly/lib/CodeGen/BlockGenerators.cpp
@@ -966,6 +966,36 @@ void VectorBlockGenerator::copyInstruction(
copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses);
}
+void VectorBlockGenerator::generateScalarVectorLoads(
+ ScopStmt &Stmt, ValueMapT &VectorBlockMap) {
+ for (MemoryAccess *MA : Stmt) {
+ if (MA->isArrayKind() || MA->isWrite())
+ continue;
+
+ auto *Address = getOrCreateAlloca(*MA);
+ Type *VectorPtrType = getVectorPtrTy(Address, 1);
+ Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType,
+ Address->getName() + "_p_vec_p");
+ auto *Val = Builder.CreateLoad(VectorPtr, Address->getName() + ".reload");
+ Constant *SplatVector = Constant::getNullValue(
+ VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
+
+ Value *VectorVal = Builder.CreateShuffleVector(
+ Val, Val, SplatVector, Address->getName() + "_p_splat");
+ VectorBlockMap[MA->getBaseAddr()] = VectorVal;
+ VectorVal->dump();
+ }
+}
+
+void VectorBlockGenerator::verifyNoScalarStores(ScopStmt &Stmt) {
+ for (MemoryAccess *MA : Stmt) {
+ if (MA->isArrayKind() || MA->isRead())
+ continue;
+
+ llvm_unreachable("Scalar stores not expected in vector loop");
+ }
+}
+
void VectorBlockGenerator::copyStmt(
ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
assert(Stmt.isBlockStmt() && "TODO: Only block statements can be copied by "
@@ -994,8 +1024,12 @@ void VectorBlockGenerator::copyStmt(
VectorValueMapT ScalarBlockMap(getVectorWidth());
ValueMapT VectorBlockMap;
+ generateScalarVectorLoads(Stmt, VectorBlockMap);
+
for (Instruction &Inst : *BB)
copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap, NewAccesses);
+
+ verifyNoScalarStores(Stmt);
}
BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB,
diff --git a/polly/test/Isl/CodeGen/simple_vec_stride_one.ll b/polly/test/Isl/CodeGen/simple_vec_stride_one.ll
new file mode 100644
index 00000000000..de3f37127eb
--- /dev/null
+++ b/polly/test/Isl/CodeGen/simple_vec_stride_one.ll
@@ -0,0 +1,37 @@
+; RUN: opt %loadPolly -polly-codegen -polly-vectorizer=polly \
+; RUN: < %s -S | FileCheck %s
+
+; CHECK: store <4 x double> %val.s2a_p_splat, <4 x double>* %vector_ptr
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @update_access_functions(i64 %arg, double* %A, double* %B) {
+bb3:
+ br label %loop1
+
+loop1:
+ %indvar = phi i64 [ %indvar.next, %loop1 ], [ 0, %bb3 ]
+ %ptr1 = getelementptr inbounds double, double* %A, i64 %indvar
+ store double 42.0, double* %ptr1, align 8
+ %indvar.next = add nuw nsw i64 %indvar, 1
+ %cmp = icmp ne i64 %indvar.next, 4
+ br i1 %cmp, label %loop1, label %loop2
+
+loop2:
+ %indvar.2 = phi i64 [ %indvar.2.next, %loop2 ], [ 0, %loop1 ]
+ %ptr2 = getelementptr inbounds double, double* %A, i64 %indvar.2
+ %val = load double, double* %ptr2, align 8
+ %indvar.2.next = add nuw nsw i64 %indvar.2, 1
+ %cmp.2 = icmp ne i64 %indvar.2.next, 4
+ br i1 %cmp.2, label %loop2, label %loop3
+
+loop3:
+ %indvar.3 = phi i64 [ %indvar.3.next, %loop3 ], [ 0, %loop2 ]
+ %ptr3 = getelementptr inbounds double, double* %A, i64 %indvar.3
+ store double %val, double* %ptr3, align 8
+ %indvar.3.next = add nuw nsw i64 %indvar.3, 1
+ %cmp.3 = icmp ne i64 %indvar.3.next, 4
+ br i1 %cmp.3, label %loop3, label %exit
+
+exit:
+ ret void
+}
OpenPOWER on IntegriCloud