diff options
| author | John Brawn <john.brawn@arm.com> | 2017-05-26 13:59:12 +0000 |
|---|---|---|
| committer | John Brawn <john.brawn@arm.com> | 2017-05-26 13:59:12 +0000 |
| commit | 9009d2905debfc210859e5d20a1fb3ec319a9ce7 (patch) | |
| tree | 93ad3b86753bdbce6d9657acf75739ae0d92f12d /llvm/test/CodeGen/ARM/memset-inline.ll | |
| parent | ba9d8ba82aab927dd8e132c947cd64efff4e09d1 (diff) | |
| download | bcm5719-llvm-9009d2905debfc210859e5d20a1fb3ec319a9ce7.tar.gz bcm5719-llvm-9009d2905debfc210859e5d20a1fb3ec319a9ce7.zip | |
[ARM] Fix lowering of misaligned memcpy/memset
Currently getOptimalMemOpType returns i32 for large enough sizes without
checking for alignment, leading to poor code generation when misaligned accesses
aren't permitted as we generate a word store then later split it up into byte
stores. This means we inadvertantly go over the MaxStoresPerMemcpy limit and for
memset we splat the memset value into a word then immediately split it up
again.
Fix this by leaving it up to FindOptimalMemOpLowering to figure out which type
to use, but also fix a bug there where it wasn't correctly checking if
misaligned memory accesses are allowed.
Differential Revision: https://reviews.llvm.org/D33442
llvm-svn: 303990
Diffstat (limited to 'llvm/test/CodeGen/ARM/memset-inline.ll')
| -rw-r--r-- | llvm/test/CodeGen/ARM/memset-inline.ll | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/memset-inline.ll b/llvm/test/CodeGen/ARM/memset-inline.ll index e1d28b98f19..b2bd257701d 100644 --- a/llvm/test/CodeGen/ARM/memset-inline.ll +++ b/llvm/test/CodeGen/ARM/memset-inline.ll @@ -38,6 +38,56 @@ entry: ret void } +define void @t3(i8* %p) { +entry: +; CHECK-7A-LABEL: t3: +; CHECK-7A: muls [[REG:r[0-9]+]], +; CHECK-7A: str [[REG]], +; CHECK-6M-LABEL: t3: +; CHECK-6M-NOT: muls +; CHECK-6M: strb [[REG:r[0-9]+]], +; CHECK-6M: strb [[REG]], +; CHECK-6M: strb [[REG]], +; CHECK-6M: strb [[REG]], + br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = trunc i32 %i to i8 + call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 1, i1 false) + call void @something(i8* %p) + %inc = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, 255 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @t4(i8* %p) { +entry: +; CHECK-7A-LABEL: t4: +; CHECK-7A: muls [[REG:r[0-9]+]], +; CHECK-7A: str [[REG]], +; CHECK-6M-LABEL: t4: +; CHECK-6M: muls [[REG:r[0-9]+]], +; CHECK-6M: strh [[REG]], +; CHECK-6M: strh [[REG]], + br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = trunc i32 %i to i8 + call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 2, i1 false) + call void @something(i8* %p) + %inc = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, 255 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + declare void @something(i8*) nounwind declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind |

