summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorRichard Sandiford <rsandifo@linux.vnet.ibm.com>2013-08-27 09:54:29 +0000
committerRichard Sandiford <rsandifo@linux.vnet.ibm.com>2013-08-27 09:54:29 +0000
commit5e318f0bfeb6276a1db0656a9719d1de520f1ed5 (patch)
tree9b22e856cfaa20310244f577ce12a13b1c604dc4 /llvm/test
parent70835f6025a9a539c38c25389cd0ee959f2ab20b (diff)
downloadbcm5719-llvm-5e318f0bfeb6276a1db0656a9719d1de520f1ed5.tar.gz
bcm5719-llvm-5e318f0bfeb6276a1db0656a9719d1de520f1ed5.zip
[SystemZ] Extend memcpy and memset support to all constant lengths
Lengths up to a certain threshold (currently 6 * 256) use a series of MVCs. Lengths above that threshold use a loop to handle X*256 bytes followed by a single MVC to handle the excess (if any). This loop will also be needed in future when support for variable lengths is added. Because the same tablegen classes are used to define MVC and CLC, the patch also has the side-effect of defining a pseudo loop instruction for CLC. That instruction isn't used yet (and wouldn't be handled correctly if it were). I'm planning to use it soon though. llvm-svn: 189331
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/SystemZ/memcpy-01.ll163
-rw-r--r--llvm/test/CodeGen/SystemZ/memset-01.ll48
-rw-r--r--llvm/test/CodeGen/SystemZ/memset-02.ll14
-rw-r--r--llvm/test/CodeGen/SystemZ/memset-03.ll14
-rw-r--r--llvm/test/CodeGen/SystemZ/memset-04.ll14
5 files changed, 224 insertions, 29 deletions
diff --git a/llvm/test/CodeGen/SystemZ/memcpy-01.ll b/llvm/test/CodeGen/SystemZ/memcpy-01.ll
index 7cb58b31cce..b53ec5452e2 100644
--- a/llvm/test/CodeGen/SystemZ/memcpy-01.ll
+++ b/llvm/test/CodeGen/SystemZ/memcpy-01.ll
@@ -4,7 +4,9 @@
declare void @llvm.memcpy.p0i8.p0i8.i32(i8 *nocapture, i8 *nocapture, i32, i32, i1) nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8 *nocapture, i8 *nocapture, i64, i32, i1) nounwind
+declare void @foo(i8 *, i8 *)
+; Test a no-op move, i32 version.
define void @f1(i8 *%dest, i8 *%src) {
; CHECK-LABEL: f1:
; CHECK-NOT: %r2
@@ -15,6 +17,7 @@ define void @f1(i8 *%dest, i8 *%src) {
ret void
}
+; Test a no-op move, i64 version.
define void @f2(i8 *%dest, i8 *%src) {
; CHECK-LABEL: f2:
; CHECK-NOT: %r2
@@ -25,6 +28,7 @@ define void @f2(i8 *%dest, i8 *%src) {
ret void
}
+; Test a 1-byte move, i32 version.
define void @f3(i8 *%dest, i8 *%src) {
; CHECK-LABEL: f3:
; CHECK: mvc 0(1,%r2), 0(%r3)
@@ -34,6 +38,7 @@ define void @f3(i8 *%dest, i8 *%src) {
ret void
}
+; Test a 1-byte move, i64 version.
define void @f4(i8 *%dest, i8 *%src) {
; CHECK-LABEL: f4:
; CHECK: mvc 0(1,%r2), 0(%r3)
@@ -43,6 +48,7 @@ define void @f4(i8 *%dest, i8 *%src) {
ret void
}
+; Test the upper range of a single MVC, i32 version.
define void @f5(i8 *%dest, i8 *%src) {
; CHECK-LABEL: f5:
; CHECK: mvc 0(256,%r2), 0(%r3)
@@ -52,6 +58,7 @@ define void @f5(i8 *%dest, i8 *%src) {
ret void
}
+; Test the upper range of a single MVC, i64 version.
define void @f6(i8 *%dest, i8 *%src) {
; CHECK-LABEL: f6:
; CHECK: mvc 0(256,%r2), 0(%r3)
@@ -61,22 +68,168 @@ define void @f6(i8 *%dest, i8 *%src) {
ret void
}
-; 257 bytes is too big for a single MVC. For now expect none, so that
-; the test fails and gets updated when large copies are implemented.
+; Test the first case that needs two MVCs.
define void @f7(i8 *%dest, i8 *%src) {
; CHECK-LABEL: f7:
-; CHECK-NOT: mvc
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(1,%r2), 256(%r3)
; CHECK: br %r14
call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 257, i32 1,
i1 false)
ret void
}
+; Test the last-but-one case that needs two MVCs.
define void @f8(i8 *%dest, i8 *%src) {
; CHECK-LABEL: f8:
-; CHECK-NOT: mvc
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(255,%r2), 256(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 511, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test the last case that needs two MVCs.
+define void @f9(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f9:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(256,%r2), 256(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 512, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test an arbitrary value that uses straight-line code.
+define void @f10(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f10:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(256,%r2), 256(%r3)
+; CHECK: mvc 512(256,%r2), 512(%r3)
+; CHECK: mvc 768(256,%r2), 768(%r3)
+; CHECK: mvc 1024(255,%r2), 1024(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+ i1 false)
+ ret void
+}
+
+; ...and again in cases where not all parts are in range of MVC.
+define void @f11(i8 *%srcbase, i8 *%destbase) {
+; CHECK-LABEL: f11:
+; CHECK: mvc 4000(256,%r2), 3500(%r3)
+; CHECK: lay [[NEWDEST:%r[1-5]]], 4256(%r2)
+; CHECK: mvc 0(256,[[NEWDEST]]), 3756(%r3)
+; CHECK: mvc 256(256,[[NEWDEST]]), 4012(%r3)
+; CHECK: lay [[NEWSRC:%r[1-5]]], 4268(%r3)
+; CHECK: mvc 512(256,[[NEWDEST]]), 0([[NEWSRC]])
+; CHECK: mvc 768(255,[[NEWDEST]]), 256([[NEWSRC]])
+; CHECK: br %r14
+ %dest = getelementptr i8 *%srcbase, i64 4000
+ %src = getelementptr i8* %destbase, i64 3500
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+ i1 false)
+ ret void
+}
+
+; ...and again with a destination frame base that goes out of range.
+define void @f12() {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc 4076(256,%r15), 2100(%r15)
+; CHECK: lay [[NEWDEST:%r[1-5]]], 4332(%r15)
+; CHECK: mvc 0(256,[[NEWDEST]]), 2356(%r15)
+; CHECK: mvc 256(256,[[NEWDEST]]), 2612(%r15)
+; CHECK: mvc 512(256,[[NEWDEST]]), 2868(%r15)
+; CHECK: mvc 768(255,[[NEWDEST]]), 3124(%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+ %arr = alloca [6000 x i8]
+ %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 3900
+ %src = getelementptr [6000 x i8] *%arr, i64 0, i64 1924
+ call void @foo(i8 *%dest, i8 *%src)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+ i1 false)
+ call void @foo(i8 *%dest, i8 *%src)
+ ret void
+}
+
+; ...and again with a source frame base that goes out of range.
+define void @f13() {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc 200(256,%r15), 3826(%r15)
+; CHECK: mvc 456(256,%r15), 4082(%r15)
+; CHECK: lay [[NEWSRC:%r[1-5]]], 4338(%r15)
+; CHECK: mvc 712(256,%r15), 0([[NEWSRC]])
+; CHECK: mvc 968(256,%r15), 256([[NEWSRC]])
+; CHECK: mvc 1224(255,%r15), 512([[NEWSRC]])
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+ %arr = alloca [6000 x i8]
+ %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 24
+ %src = getelementptr [6000 x i8] *%arr, i64 0, i64 3650
+ call void @foo(i8 *%dest, i8 *%src)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+ i1 false)
+ call void @foo(i8 *%dest, i8 *%src)
+ ret void
+}
+
+; Test the last case that is done using straight-line code.
+define void @f14(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f14:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(256,%r2), 256(%r3)
+; CHECK: mvc 512(256,%r2), 512(%r3)
+; CHECK: mvc 768(256,%r2), 768(%r3)
+; CHECK: mvc 1024(256,%r2), 1024(%r3)
+; CHECK: mvc 1280(256,%r2), 1280(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1536, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test the first case that is done using a loop.
+define void @f15(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f15:
+; CHECK: lghi [[COUNT:%r[0-5]]], 6
+; CHECK: [[LABEL:\.L[^:]*]]:
+; CHECK: pfd 2, 768(%r2)
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: la %r2, 256(%r2)
+; CHECK: la %r3, 256(%r3)
+; CHECK: brctg [[COUNT]], [[LABEL]]
+; CHECK: mvc 0(1,%r2), 0(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1,
+ i1 false)
+ ret void
+}
+
+; ...and again with frame bases, where the base must be loaded into a
+; register before the loop.
+define void @f16() {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-DAG: lghi [[COUNT:%r[0-5]]], 6
+; CHECK-DAG: la [[BASE:%r[0-5]]], 160(%r15)
+; CHECK: [[LABEL:\.L[^:]*]]:
+; CHECK: pfd 2, 2368([[BASE]])
+; CHECK: mvc 1600(256,[[BASE]]), 0([[BASE]])
+; CHECK: la [[BASE]], 256([[BASE]])
+; CHECK: brctg [[COUNT]], [[LABEL]]
+; CHECK: mvc 1600(1,[[BASE]]), 0([[BASE]])
+; CHECK: brasl %r14, foo@PLT
; CHECK: br %r14
- call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 257, i32 1,
+ %arr = alloca [3200 x i8]
+ %dest = getelementptr [3200 x i8] *%arr, i64 0, i64 1600
+ %src = getelementptr [3200 x i8] *%arr, i64 0, i64 0
+ call void @foo(i8 *%dest, i8 *%src)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1,
i1 false)
+ call void @foo(i8 *%dest, i8 *%src)
ret void
}
diff --git a/llvm/test/CodeGen/SystemZ/memset-01.ll b/llvm/test/CodeGen/SystemZ/memset-01.ll
index b272a5bcc69..f17901cc73a 100644
--- a/llvm/test/CodeGen/SystemZ/memset-01.ll
+++ b/llvm/test/CodeGen/SystemZ/memset-01.ll
@@ -103,22 +103,58 @@ define void @f10(i8 *%dest, i8 %val) {
ret void
}
-; 258 bytes, i32 version. 258 bytes is too big for a single MVC.
-; For now expect none, so that the test fails and gets updated when
-; large copies are implemented.
+; 258 bytes, i32 version. We need two MVCs.
define void @f11(i8 *%dest, i8 %val) {
; CHECK-LABEL: f11:
-; CHECK-NOT: mvc
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
; CHECK: br %r14
call void @llvm.memset.p0i8.i32(i8 *%dest, i8 %val, i32 258, i32 1, i1 false)
ret void
}
-; 258 bytes, i64 version, with the same comments as above.
+; 258 bytes, i64 version.
define void @f12(i8 *%dest, i8 %val) {
; CHECK-LABEL: f12:
-; CHECK-NOT: mvc
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
; CHECK: br %r14
call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 258, i32 1, i1 false)
ret void
}
+
+; Test the largest case for which straight-line code is used.
+define void @f13(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f13:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(256,%r2), 256(%r2)
+; CHECK: mvc 513(256,%r2), 512(%r2)
+; CHECK: mvc 769(256,%r2), 768(%r2)
+; CHECK: mvc 1025(256,%r2), 1024(%r2)
+; CHECK: mvc 1281(256,%r2), 1280(%r2)
+; CHECK: br %r14
+ call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 1537, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test the next size up, which uses a loop. We leave the other corner
+; cases to memcpy-01.ll.
+define void @f14(i8 *%dest, i8 %val) {
+; CHECK-LABEL: f14:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: lghi [[COUNT:%r[0-5]]], 6
+; CHECK: [[LABEL:\.L[^:]*]]:
+; CHECK: pfd 2, 769(%r2)
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: la %r2, 256(%r2)
+; CHECK: brctg [[COUNT]], [[LABEL]]
+; CHECK: mvc 1(1,%r2), 0(%r2)
+; CHECK: br %r14
+ call void @llvm.memset.p0i8.i64(i8 *%dest, i8 %val, i64 1538, i32 1,
+ i1 false)
+ ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/memset-02.ll b/llvm/test/CodeGen/SystemZ/memset-02.ll
index b74d907aa9a..b4724c0b574 100644
--- a/llvm/test/CodeGen/SystemZ/memset-02.ll
+++ b/llvm/test/CodeGen/SystemZ/memset-02.ll
@@ -139,21 +139,23 @@ define void @f14(i8 *%dest) {
ret void
}
-; 258 bytes, i32 version. 258 bytes is too big for a single MVC.
-; For now expect none, so that the test fails and gets updated when
-; large copies are implemented.
+; 258 bytes, i32 version. We need two MVCs.
define void @f15(i8 *%dest) {
; CHECK-LABEL: f15:
-; CHECK-NOT: mvc
+; CHECK: mvi 0(%r2), 128
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
; CHECK: br %r14
call void @llvm.memset.p0i8.i32(i8 *%dest, i8 128, i32 258, i32 1, i1 false)
ret void
}
-; 258 bytes, i64 version, with the same comments as above.
+; 258 bytes, i64 version.
define void @f16(i8 *%dest) {
; CHECK-LABEL: f16:
-; CHECK-NOT: mvc
+; CHECK: mvi 0(%r2), 128
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
; CHECK: br %r14
call void @llvm.memset.p0i8.i64(i8 *%dest, i8 128, i64 258, i32 1, i1 false)
ret void
diff --git a/llvm/test/CodeGen/SystemZ/memset-03.ll b/llvm/test/CodeGen/SystemZ/memset-03.ll
index 1d48f1ad6dc..3f954c4f79f 100644
--- a/llvm/test/CodeGen/SystemZ/memset-03.ll
+++ b/llvm/test/CodeGen/SystemZ/memset-03.ll
@@ -375,21 +375,23 @@ define void @f38(i8 *%dest) {
ret void
}
-; 258 bytes, i32 version. 258 bytes is too big for a single MVC.
-; For now expect none, so that the test fails and gets updated when
-; large copies are implemented.
+; 258 bytes, i32 version. We need two MVCs.
define void @f39(i8 *%dest) {
; CHECK-LABEL: f39:
-; CHECK-NOT: mvc
+; CHECK: mvi 0(%r2), 0
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
; CHECK: br %r14
call void @llvm.memset.p0i8.i32(i8 *%dest, i8 0, i32 258, i32 1, i1 false)
ret void
}
-; 258 bytes, i64 version, with the same comments as above.
+; 258 bytes, i64 version.
define void @f40(i8 *%dest) {
; CHECK-LABEL: f40:
-; CHECK-NOT: mvc
+; CHECK: mvi 0(%r2), 0
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
; CHECK: br %r14
call void @llvm.memset.p0i8.i64(i8 *%dest, i8 0, i64 258, i32 1, i1 false)
ret void
diff --git a/llvm/test/CodeGen/SystemZ/memset-04.ll b/llvm/test/CodeGen/SystemZ/memset-04.ll
index 92886921b07..7906e8d10a1 100644
--- a/llvm/test/CodeGen/SystemZ/memset-04.ll
+++ b/llvm/test/CodeGen/SystemZ/memset-04.ll
@@ -375,21 +375,23 @@ define void @f38(i8 *%dest) {
ret void
}
-; 258 bytes, i32 version. 258 bytes is too big for a single MVC.
-; For now expect none, so that the test fails and gets updated when
-; large copies are implemented.
+; 258 bytes, i32 version. We need two MVCs.
define void @f39(i8 *%dest) {
; CHECK-LABEL: f39:
-; CHECK-NOT: mvc
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
; CHECK: br %r14
call void @llvm.memset.p0i8.i32(i8 *%dest, i8 -1, i32 258, i32 1, i1 false)
ret void
}
-; 258 bytes, i64 version, with the same comments as above.
+; 258 bytes, i64 version.
define void @f40(i8 *%dest) {
; CHECK-LABEL: f40:
-; CHECK-NOT: mvc
+; CHECK: mvi 0(%r2), 255
+; CHECK: mvc 1(256,%r2), 0(%r2)
+; CHECK: mvc 257(1,%r2), 256(%r2)
; CHECK: br %r14
call void @llvm.memset.p0i8.i64(i8 *%dest, i8 -1, i64 258, i32 1, i1 false)
ret void
OpenPOWER on IntegriCloud