From b6cf523c16e148e1ece5fc4ff4657d79323a4635 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Oct 2018 11:47:55 +1100 Subject: m68k: Unroll raw_outsb() loop Unroll the raw_outsb() loop using the optimized assembler code from raw_outsw(). That code is copied and pasted, with movew changed to moveb. This improves the performance of sequential write transfers using mac_esp in PIO mode by 5% or 10%. (The DMA controller on the 840av/660av models is still unsupported so PIO transfers are used.) Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/raw_io.h | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) (limited to 'arch/m68k/include') diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h index 85761255dde5..8a6dc6e5a279 100644 --- a/arch/m68k/include/asm/raw_io.h +++ b/arch/m68k/include/asm/raw_io.h @@ -107,12 +107,43 @@ static inline void raw_insb(volatile u8 __iomem *port, u8 *buf, unsigned int len } static inline void raw_outsb(volatile u8 __iomem *port, const u8 *buf, - unsigned int len) + unsigned int nr) { - unsigned int i; + unsigned int tmp; - for (i = 0; i < len; i++) - out_8(port, *buf++); + if (nr & 15) { + tmp = (nr & 15) - 1; + asm volatile ( + "1: moveb %0@+,%2@; dbra %1,1b" + : "=a" (buf), "=d" (tmp) + : "a" (port), "0" (buf), + "1" (tmp)); + } + if (nr >> 4) { + tmp = (nr >> 4) - 1; + asm volatile ( + "1: " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "moveb %0@+,%2@; " + "dbra %1,1b" + : "=a" (buf), "=d" (tmp) + : "a" (port), "0" (buf), + "1" (tmp)); + } } static inline void raw_insw(volatile u16 __iomem *port, u16 *buf, unsigned int nr) -- cgit v1.2.1