summaryrefslogtreecommitdiffstats
path: root/arch/blackfin/lib/muldi3.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-07 12:00:25 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-07 12:00:25 -0800
commit5bb47b9ff3d16d40f8d45380b373497a545fa280 (patch)
treee13dd34395473342dc75eff5cbaf5b1ea753631c /arch/blackfin/lib/muldi3.S
parent2f2408a88cf8fa43febfd7fb5783e61b2937b0f9 (diff)
parent06af15e086e39a5a2a2413973a64af8e10122f28 (diff)
downloadtalos-obmc-linux-5bb47b9ff3d16d40f8d45380b373497a545fa280.tar.gz
talos-obmc-linux-5bb47b9ff3d16d40f8d45380b373497a545fa280.zip
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/blackfin-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/blackfin-2.6: (171 commits) Blackfin arch: fix bug - BF527 0.2 silicon has different CPUID (DSPID) value Blackfin arch: Enlarge flash partition for kenel for bf533/bf537 boards Blackfin arch: fix bug: kernel crash when enable SDIO host driver Blackfin arch: Print FP at level KERN_NOTICE Blackfin arch: drop ad73311 test code Blackfin arch: update board default configs Blackfin arch: Set PB4 as the default irq for bf548 board v1.4+. Blackfin arch: fix typo in early printk bit size processing Blackfin arch: enable reprogram cclk and sclk for bf518f-ezbrd Blackfin arch: add SDIO host driver platform data Blackfin arch: fix bug - kernel stops at initial console Blackfin arch: fix bug - kernel crash after config IP for ethernet port Blackfin arch: add sdh support for bf518f-ezbrd Blackfin arch: fix bug - kernel detects BF532 incorrectly Blackfin arch: add () to avoid warnings from gcc Blackfin arch: change HWTRACE Kconfig and set it on default Blackfin arch: Clean oprofile build path for blackfin Blackfin arch: remove hardware PM code, oprofile not use it Blackfin arch: rewrite get_sclk()/get_vco() Blackfin arch: cleanup and unify the ins functions ...
Diffstat (limited to 'arch/blackfin/lib/muldi3.S')
-rw-r--r--arch/blackfin/lib/muldi3.S68
1 files changed, 68 insertions, 0 deletions
diff --git a/arch/blackfin/lib/muldi3.S b/arch/blackfin/lib/muldi3.S
new file mode 100644
index 000000000000..abde120ee230
--- /dev/null
+++ b/arch/blackfin/lib/muldi3.S
@@ -0,0 +1,68 @@
+.align 2
+.global ___muldi3;
+.type ___muldi3, STT_FUNC;
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+.section .l1.text
+#else
+.text
+#endif
+
+/*
+ R1:R0 * R3:R2
+ = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
+[X] = (R1.h * R3.h) * 2^96
+[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80
+[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
+[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
+[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
+[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16
+[T4] + (R0.l * R2.l)
+
+ We can discard the first three lines marked "X" since we produce
+ only a 64 bit result. So, we need ten 16-bit multiplies.
+
+ Individual mul-acc results:
+[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
+[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
+[E3] = R0.l * R2.h + R2.l * R0.h
+[E4] = R0.l * R2.l
+
+ We also need to add high parts from lower-level results to higher ones:
+ E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4
+
+ One interesting property is that all parts of the result that depend
+ on the sign of the multiplication are discarded. Those would be the
+ multiplications involving R1.h and R3.h, but only the top 16 bit of
+ the 32 bit result depend on the sign, and since R1.h and R3.h only
+ occur in E1, the top half of these results is cut off.
+ So, we can just use FU mode for all of the 16-bit multiplies, and
+ ignore questions of when to use mixed mode. */
+
+___muldi3:
+ /* [SP] technically is part of the caller's frame, but we can
+ use it as scratch space. */
+ A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */
+ A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */
+ A0 += A1; /* E1 */
+ R4 = A0.w;
+ A0 = R0.l * R3.l (FU); /* E2 */
+ A0 += R2.l * R1.l (FU); /* E2 */
+
+ A1 = R2.L * R0.L (FU); /* E4 */
+ R3 = A1.w;
+ A1 = A1 >> 16; /* E3c */
+ A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */
+ A1 += R0.L * R2.H (FU); /* E3c */
+ R0 = A1.w;
+ A1 = A1 >> 16; /* E2c */
+ A0 += A1; /* E2c */
+ R1 = A0.w;
+
+ /* low(result) = low(E3c):low(E4) */
+ R0 = PACK (R0.l, R3.l);
+ /* high(result) = E2c + (E1 << 16) */
+ R1.h = R1.h + R4.l (NS) || R4 = [SP];
+ RTS;
+
+.size ___muldi3, .-___muldi3
OpenPOWER on IntegriCloud