summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp9
-rw-r--r--llvm/test/Transforms/LoopIdiom/basic.ll75
2 files changed, 80 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 0451f774378..9ef5d81a66d 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -756,8 +756,8 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
MSIs.insert(MSI);
bool NegStride = SizeInBytes == -Stride;
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
- MSI->getAlignment(), SplatValue, MSI, MSIs, Ev,
- BECount, NegStride, /*IsLoopMemset=*/true);
+ MSI->getDestAlignment(), SplatValue, MSI, MSIs,
+ Ev, BECount, NegStride, /*IsLoopMemset=*/true);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -1037,16 +1037,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
- unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
CallInst *NewCall = nullptr;
// Check whether to generate an unordered atomic memcpy:
// If the load or store are atomic, then they must neccessarily be unordered
// by previous checks.
if (!SI->isAtomic() && !LI->isAtomic())
- NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align);
+ NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
+ LoadBasePtr, LI->getAlignment(), NumBytes);
else {
// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
+ unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
if (Align < StoreSize)
return false;
diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll
index e16817fe364..7c491b357c7 100644
--- a/llvm/test/Transforms/LoopIdiom/basic.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic.ll
@@ -28,6 +28,29 @@ for.end: ; preds = %for.body, %entry
; CHECK-NOT: store
}
+; Make sure memset is formed for larger than 1 byte stores, and that the
+; alignment of the store is preserved
+define void @test1_i16(i16* align 2 %Base, i64 %Size) nounwind ssp {
+bb.nph: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
+ store i16 0, i16* %I.0.014, align 2
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+; CHECK-LABEL: @test1_i16(
+; CHECK: %[[BaseBC:.*]] = bitcast i16* %Base to i8*
+; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 1
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 2 %[[BaseBC]], i8 0, i64 %[[Sz]], i1 false)
+; CHECK-NOT: store
+}
+
; This is a loop that was rotated but where the blocks weren't merged. This
; shouldn't perturb us.
define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
@@ -169,6 +192,58 @@ for.end: ; preds = %for.body, %entry
; CHECK: ret void
}
+;; memcpy formation, check alignment
+define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp {
+bb.nph:
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+ %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+ %V = load i32, i32* %I.0.014, align 1
+ store i32 %V, i32* %DestI, align 4
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+; CHECK-LABEL: @test6_dest_align(
+; CHECK: %[[Dst:.*]] = bitcast i32* %Dest to i8*
+; CHECK: %[[Src:.*]] = bitcast i32* %Base to i8*
+; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %[[Dst]], i8* align 1 %[[Src]], i64 %[[Sz]], i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+;; memcpy formation, check alignment
+define void @test6_src_align(i32* noalias align 4 %Base, i32* noalias align 1 %Dest, i64 %Size) nounwind ssp {
+bb.nph:
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+ %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+ %V = load i32, i32* %I.0.014, align 4
+ store i32 %V, i32* %DestI, align 1
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+; CHECK-LABEL: @test6_src_align(
+; CHECK: %[[Dst]] = bitcast i32* %Dest to i8*
+; CHECK: %[[Src]] = bitcast i32* %Base to i8*
+; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %[[Dst]], i8* align 4 %[[Src]], i64 %[[Sz]], i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
; This is a loop that was rotated but where the blocks weren't merged. This
; shouldn't perturb us.
OpenPOWER on IntegriCloud