summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/ARM/memset-inline.ll
diff options
context:
space:
mode:
authorJohn Brawn <john.brawn@arm.com>2017-05-26 13:59:12 +0000
committerJohn Brawn <john.brawn@arm.com>2017-05-26 13:59:12 +0000
commit9009d2905debfc210859e5d20a1fb3ec319a9ce7 (patch)
tree93ad3b86753bdbce6d9657acf75739ae0d92f12d /llvm/test/CodeGen/ARM/memset-inline.ll
parentba9d8ba82aab927dd8e132c947cd64efff4e09d1 (diff)
downloadbcm5719-llvm-9009d2905debfc210859e5d20a1fb3ec319a9ce7.tar.gz
bcm5719-llvm-9009d2905debfc210859e5d20a1fb3ec319a9ce7.zip
[ARM] Fix lowering of misaligned memcpy/memset
Currently getOptimalMemOpType returns i32 for large enough sizes without checking for alignment, leading to poor code generation when misaligned accesses aren't permitted as we generate a word store then later split it up into byte stores. This means we inadvertantly go over the MaxStoresPerMemcpy limit and for memset we splat the memset value into a word then immediately split it up again. Fix this by leaving it up to FindOptimalMemOpLowering to figure out which type to use, but also fix a bug there where it wasn't correctly checking if misaligned memory accesses are allowed. Differential Revision: https://reviews.llvm.org/D33442 llvm-svn: 303990
Diffstat (limited to 'llvm/test/CodeGen/ARM/memset-inline.ll')
-rw-r--r--llvm/test/CodeGen/ARM/memset-inline.ll50
1 files changed, 50 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/memset-inline.ll b/llvm/test/CodeGen/ARM/memset-inline.ll
index e1d28b98f19..b2bd257701d 100644
--- a/llvm/test/CodeGen/ARM/memset-inline.ll
+++ b/llvm/test/CodeGen/ARM/memset-inline.ll
@@ -38,6 +38,56 @@ entry:
ret void
}
+define void @t3(i8* %p) {
+entry:
+; CHECK-7A-LABEL: t3:
+; CHECK-7A: muls [[REG:r[0-9]+]],
+; CHECK-7A: str [[REG]],
+; CHECK-6M-LABEL: t3:
+; CHECK-6M-NOT: muls
+; CHECK-6M: strb [[REG:r[0-9]+]],
+; CHECK-6M: strb [[REG]],
+; CHECK-6M: strb [[REG]],
+; CHECK-6M: strb [[REG]],
+ br label %for.body
+
+for.body:
+ %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %0 = trunc i32 %i to i8
+ call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 1, i1 false)
+ call void @something(i8* %p)
+ %inc = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %inc, 255
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @t4(i8* %p) {
+entry:
+; CHECK-7A-LABEL: t4:
+; CHECK-7A: muls [[REG:r[0-9]+]],
+; CHECK-7A: str [[REG]],
+; CHECK-6M-LABEL: t4:
+; CHECK-6M: muls [[REG:r[0-9]+]],
+; CHECK-6M: strh [[REG]],
+; CHECK-6M: strh [[REG]],
+ br label %for.body
+
+for.body:
+ %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %0 = trunc i32 %i to i8
+ call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 2, i1 false)
+ call void @something(i8* %p)
+ %inc = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %inc, 255
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
declare void @something(i8*) nounwind
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
OpenPOWER on IntegriCloud