summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2017-10-04 19:27:08 +0200
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2017-10-09 11:18:07 +0200
commit993fef95b9c1858894d14b221e04f1161e4f4ed9 (patch)
treeb0c768f991a44cf09d660c322fec49ce36753742 /arch/s390
parent41879ff65d8b025eace44610be0b07f678fb3224 (diff)
downloadblackbird-op-linux-993fef95b9c1858894d14b221e04f1161e4f4ed9.tar.gz
blackbird-op-linux-993fef95b9c1858894d14b221e04f1161e4f4ed9.zip
s390: optimize memset implementation
Like for the memset16/32/64 variants avoid that subsequent mvc instructions depend on each other since that might have negative performance impacts. This patch is currently hardly relevant since at least gcc 7.1 generates only inline memset code and not a single memset call. However there is no reason to not provide an optimized version just in case gcc generates memset calls again, like it did in the past. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/lib/mem.S20
1 files changed, 12 insertions, 8 deletions
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index f88cf6983849..9255a087fa96 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -78,21 +78,25 @@ ENTRY(memset)
ex %r4,0(%r3)
br %r14
.Lmemset_fill:
- stc %r3,0(%r2)
cghi %r4,1
lgr %r1,%r2
- ber %r14
+ je .Lmemset_fill_exit
aghi %r4,-2
- srlg %r3,%r4,8
- ltgr %r3,%r3
+ srlg %r5,%r4,8
+ ltgr %r5,%r5
jz .Lmemset_fill_remainder
.Lmemset_fill_loop:
- mvc 1(256,%r1),0(%r1)
+ stc %r3,0(%r1)
+ mvc 1(255,%r1),0(%r1)
la %r1,256(%r1)
- brctg %r3,.Lmemset_fill_loop
+ brctg %r5,.Lmemset_fill_loop
.Lmemset_fill_remainder:
- larl %r3,.Lmemset_mvc
- ex %r4,0(%r3)
+ stc %r3,0(%r1)
+ larl %r5,.Lmemset_mvc
+ ex %r4,0(%r5)
+ br %r14
+.Lmemset_fill_exit:
+ stc %r3,0(%r1)
br %r14
.Lmemset_xc:
xc 0(1,%r1),0(%r1)
OpenPOWER on IntegriCloud