From 5c91fb902d4e6f6006faf45edd3f25932cb7d58c Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Sat, 17 Nov 2007 23:46:58 +0800 Subject: Blackfin arch: Add assembly function insl_16 /* * CPUs often take a performance hit when accessing unaligned memory * locations. The actual performance hit varies, it can be small if the * hardware handles it or large if we have to take an exception and fix * it * in software. * * Since an ethernet header is 14 bytes network drivers often end up * with * the IP header at an unaligned offset. The IP header can be aligned by * shifting the start of the packet by 2 bytes. Drivers should do this * with: * * skb_reserve(NET_IP_ALIGN); * * The downside to this alignment of the IP header is that the DMA is * now * unaligned. On some architectures the cost of an unaligned DMA is high * and this cost outweighs the gains made by aligning the IP header. * * Since this trade off varies between architectures, we allow * NET_IP_ALIGN * to be overridden. */ This new function insl_16 allows to read form 32-bit IO and writes to 16-bit aligned memory. This is useful in above described scenario - In particular with the AXIS AX88180 Gigabit Ethernet MAC. Once the device is in 32-bit mode, reads from the RX FIFO always decrements 4bytes. While on the other side the destination address in SDRAM is always 16-bit aligned. If we use skb_reserve(0) the receive buffer is 32-bit aligned but later we hit a unaligned exception in the IP code. Signed-off-by: Michael Hennerich Signed-off-by: Bryan Wu --- arch/blackfin/lib/ins.S | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/blackfin/lib/ins.S') diff --git a/arch/blackfin/lib/ins.S b/arch/blackfin/lib/ins.S index a17cc77ac36f..df7b8833a0c5 100644 --- a/arch/blackfin/lib/ins.S +++ b/arch/blackfin/lib/ins.S @@ -77,3 +77,22 @@ ENTRY(_insb) sti R3; RTS; ENDPROC(_insb) + + + +ENTRY(_insl_16) + P0 = R0; /* P0 = port */ + cli R3; + P1 = R1; /* P1 = address */ + P2 = R2; /* P2 = count */ + SSYNC; + LSETUP( .Llong16_loop_s, .Llong16_loop_e) LC0 = P2; +.Llong16_loop_s: R0 = [P0]; + W[P1++] = R0; + R0 = R0 >> 16; + W[P1++] = R0; + NOP; +.Llong16_loop_e: NOP; + sti R3; + RTS; +ENDPROC(_insl_16) -- cgit v1.2.1