summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile16
-rw-r--r--cf-code/cf-fsi-fw.S489
-rw-r--r--cf-code/cf-fsi-palmetto.h16
-rw-r--r--cf-code/cf-fsi-romulus.h12
-rw-r--r--cf-code/cf-fsi-shared.h3
-rw-r--r--cf-code/cf-fsi-split.h1
-rw-r--r--cf-code/cf-fsi-witherspoon.h12
-rw-r--r--cf-fsi-fw.h28
-rw-r--r--cf-fsi-test.c106
-rwxr-xr-xdist-bin/cf-fsi-palmetto.binbin3456 -> 0 bytes
-rwxr-xr-xdist-bin/cf-fsi-romulus.binbin3456 -> 0 bytes
-rwxr-xr-xdist-bin/cf-fsi-witherspoon.binbin3472 -> 0 bytes
12 files changed, 454 insertions, 229 deletions
diff --git a/Makefile b/Makefile
index e6e64af..33f6d07 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,7 @@ TARGETS_bin = $(patsubst %.h,%.bin,$(TARGET_DEFS))
FW_SOURCE = cf-code/cf-fsi-fw.S
FW_DEPS = $(FW_SOURCE) cf-fsi-fw.h
-all: $(TARGETS_bin) cf-fsi-test-rom cf-fsi-test-palm
+all: cf-fsi-fw.bin cf-fsi-test-rom cf-fsi-test-palm
cf-code/%.s : cf-code/%.h $(FW_DEPS)
$(CC) -E $(M68KCPPFLAGS) -I. -include $< $(FW_SOURCE) -o $@
@@ -44,21 +44,21 @@ cf-code/%.elf : cf-code/%.o
cf-code/%.bin : cf-code/%.elf
$(M68KOC) -O binary $^ $@
-cf-wrapper-rom.o : cf-wrapper.S cf-code/cf-fsi-romulus.bin
- $(CC) $(CFLAGS) -DCF_FILE="cf-code/cf-fsi-romulus.bin" -c cf-wrapper.S -o $@
+cf-fsi-fw.bin : $(TARGETS_bin)
+ cat $^ >$@
-cf-wrapper-palm.o : cf-wrapper.S cf-code/cf-fsi-palmetto.bin
- $(CC) $(CFLAGS) -DCF_FILE="cf-code/cf-fsi-palmetto.bin" -c cf-wrapper.S -o $@
+cf-wrapper.o : cf-fsi-fw.bin cf-wrapper.S
+ $(CC) $(CFLAGS) -DCF_FILE=$< -c cf-wrapper.S -o $@
-cf-fsi-test-rom : cf-fsi-test.c cf-wrapper-rom.o
+cf-fsi-test-rom : cf-fsi-test.c cf-wrapper.o
$(CC) $(CFLAGS) -DROMULUS $^ -o $@
-cf-fsi-test-palm : cf-fsi-test.c cf-wrapper-palm.o
+cf-fsi-test-palm : cf-fsi-test.c cf-wrapper.o
$(CC) $(CFLAGS) -O0 -mcpu=arm926ej-s -DPALMETTO $^ -o $@
# Keep the ELF for debugging
.PRECIOUS : cf-code/%.elf
clean:
- rm -rf cf-fsi-test-* *.o *.s
+ rm -rf cf-fsi-test-* *.o *.s *.bin
rm -rf cf-code/*.elf cf-code/*.bin cf-code/*.s cf-code/*.o
diff --git a/cf-code/cf-fsi-fw.S b/cf-code/cf-fsi-fw.S
index c0ca927..adcf964 100644
--- a/cf-code/cf-fsi-fw.S
+++ b/cf-code/cf-fsi-fw.S
@@ -6,6 +6,8 @@
.equ SRAM_BASE_LE, 0x720000
.equ GPIO_BASE, 0x780000
.equ CVIC_BASE, 0x6c2000
+ .equ IFLUSH_BASE, 0x008000
+ .equ DFLUSH_BASE, 0x008004
.equ STACK_SIZE, 0x100 /* 256 bytes of stack is enough */
@@ -21,7 +23,7 @@
*
* A0 : sratch/temp
* A1 : SRAM base (BE)
- * A2: CVIC address.
+ * A2: GPIO block base
* A3: TRACEBUF
* A4 : Data GPIO address
* A5 : Clock GPIO address
@@ -31,10 +33,10 @@
* D6 : data/trans GPIO cache
* D5 : clock bit number
* D4 : data value
- * D3 : loop counter
+ * D3 : loop counter or scratch
* D2 : command register
- * D1 : scratch/temp
- * D0 : scratch/temp
+ * D1 : data bit number or scratch/temp
+ * D0 : FW control bits or scratch/temp
*/
/*
@@ -44,24 +46,87 @@
*/
#define DCLK d7
-#if CLOCK_GPIO_VREG == DATA_GPIO_VREG
+#ifdef DCLK_DDAT_SHARED
#define DDAT d7
+#define DTRA d6
#else
#define DDAT d6
+#define DTRA d0
#endif
+ /*
+ * Beginning of code
+ */
-#if TRANS_GPIO_VREG == CLOCK_GPIO_VREG
-#define DTRA d7
-#elif TRANS_GPIO_VREG == DATA_GPIO_VREG
-#define DTRA d6
-#elif CLOCK_GPIO_VREG == DATA_GPIO_VREG
-#define DTRA d6
+ .text
+ .org 0
+
+ /*
+ * m68k exception Vectors
+ */
+_vecs:
+ /* Boot vector */
+ .long _stack_top /* Stack below 1M */
+ .long _start /* Start code */
+
+ /* Remaining 254 vectors point to corresponding stubs
+ * starting at 0x10000, 0x10 bytes each
+ */
+ .rept 254
+0: .long _bad_exceptions + (0b - _vecs)
+ .endr
+
+ /*
+ * Header info
+ */
+ .org 0x400
+_header_info:
+ .short SYS_SIG /* 0x00 */
+ .short FW_VERSION /* 0x02 */
+ .byte API_VERSION_MAJ /* 0x04 */
+ .byte API_VERSION_MIN /* 0x05 */
+ .byte 0,0 /* 0x06 pad */
+
+#ifdef ENABLE_TRACE
+#define _FW_OPTION_TRACE_EN FW_OPTION_TRACE_EN
#else
-#define DTRA d1 /* Temp, not a cache */
-#define DTRA_UNCACHED
+#define _FW_OPTION_TRACE_EN 0
#endif
+ .long _FW_OPTION_TRACE_EN | FW_OPTION_CONT_CLOCK /* 0x08 FW options */
+ .long 0 /* 0x0c pad */
+ .long _end - _vecs /* 0x10 FW size */
- /* Tracing macro */
+ /*
+ * Config area
+ */
+ .org 0x400 + HDR_CMD_STAT_AREA
+_cmd_stat_base:
+ .long SRAM_BASE_BE
+_fw_ctrl:
+ .long 0
+
+ .org 0x400 + HDR_CLOCK_GPIO_VADDR
+_clk_gpio_vreg:
+ .short 0x1e0
+_clk_gpio_dreg:
+ .short 0x0d8
+_dat_gpio_vreg:
+ .short 0x1e0
+_dat_gpio_dreg:
+ .short 0x0d8
+_tra_gpio_vreg:
+ .short 0x080
+_tra_gpio_dreg:
+ .short 0x0d0
+_clk_gpio_bit:
+ .byte 16
+_dat_gpio_bit:
+ .byte 18
+_tra_gpio_bit:
+ .byte 10
+
+ /*
+ * Tracing macro
+ */
#ifdef ENABLE_TRACE
.macro trace op:req
@@ -70,11 +135,21 @@
.macro count_clock
addq.l #1,%a1@(CLK_CNT)
.endm
+ .macro count_stop
+ addq.l #1,%a1@(STOP_CNT)
+ .endm
+ .macro count_ints
+ addq.l #1,%a1@(INT_CNT)
+ .endm
#else
.macro trace op:req
.endm
.macro count_clock
.endm
+ .macro count_stop
+ .endm
+ .macro count_ints
+ .endm
#endif
/*
@@ -91,32 +166,32 @@
.endm
/* clock_out_bit reg: Clock out bit 31 of reg */
- .macro clock_out_bit reg:req
+ .macro clock_out_bit reg:req dbit:req
btst.l #31,\reg
beq 98f
- bset.l #DATA_GPIO_BIT,%DDAT
+ bset.l \dbit,%DDAT
trace #TR_CLKOBIT1
bra 99f
-98: bclr.l #DATA_GPIO_BIT,%DDAT
+98: bclr.l \dbit,%DDAT
trace #TR_CLKOBIT0
-99:
- /* If data and clock GPIO share the same register, such as on
+99: /* If data and clock GPIO share the same register, such as on
* Romulus, the write done by clock_toggle will set the new data
* value along with the low clock edge. Thus we don't need to
* set it here, thus saving a PCLK
*/
-#if DATA_GPIO_VREG != CLOCK_GPIO_VREG
+#ifndef DCLK_DDAT_SHARED
move.l %DDAT,%a4@(0)
#endif
clock_toggle
.endm
/* clock_zeros reg: Clock out zeros (GPIO set to 1), assume at least 1 */
- .macro clock_out_zeros reg:req
+ .macro clock_out_zeros reg:req tmp:req
trace #TR_CLKZ
trace \reg
- bset.l #DATA_GPIO_BIT,%DDAT
-#if DATA_GPIO_VREG != CLOCK_GPIO_VREG
+ mvz.b %pc@(_dat_gpio_bit),\tmp
+ bset.l \tmp,%DDAT
+#ifndef DCLK_DDAT_SHARED
move.l %DDAT,%a4@(0)
#endif
99: clock_toggle
@@ -127,81 +202,35 @@
/* clock_in_bit reg: Clocks in bit into bit 0 of reg, the rest is 0
* note: bit 0 of reg must already be cleared
*/
- .macro clock_in_bit reg:req tmp:req tmp2:req
+ .macro clock_in_bit reg:req tmp:req dbit:req idx:req
bclr.l %d5,%DCLK /* clock low */
move.l %DCLK,%a5@(0)
-#ifdef EXTRA_DUMMY_READ
+_clk_in_patch\idx:
move.l %a4@(0),\tmp /* dummy read */
-#endif
move.l %a4@(0),\tmp /* dummy read */
move.l %a4@(0),\tmp /* actual read */
bset.l %d5,%DCLK /* clock high */
move.l %DCLK,%a5@(0)
count_clock
- moveq.l #DATA_GPIO_BIT,\tmp2
- lsr.l \tmp2,\tmp
- moveq.l #1,\tmp2
- and.l \tmp2,\tmp
+ lsr.l \dbit,\tmp
+ andi.l #1,\tmp
or.l \tmp,\reg
#ifdef ENABLE_TRACE
- move.l #TR_CLKIBIT0,\tmp2
- or.l \tmp,\tmp2
- trace \tmp2
+ ori.l #TR_CLKIBIT0,\tmp
+ trace \tmp
#endif
.endm
/*
- * Beginning of code
- */
-
- .text
- .org 0
-
- /*
- * m68k exception Vectors
+ * Macro used when there's no STOP or when doing contiguous
+ * clocking insert some NOPs to relieve the bus.
*/
-_vecs:
- /* Boot vector */
- .long _stack_top /* Stack below 1M */
- .long _start /* Start code */
-
- /* Remaining 254 vectors point to corresponding stubs
- * starting at 0x10000, 0x10 bytes each
- */
- .rept 254
-0: .long _bad_exceptions + (0b - _vecs)
- .endr
-
- /*
- * Header info
- */
- .org 0x400
-_header_info:
- .short SYS_SIG /* 0x00 */
- .short FW_VERSION /* 0x02 */
- .byte API_VERSION_MAJ /* 0x04 */
- .byte API_VERSION_MIN /* 0x05 */
- .byte 0,0 /* 0x06 pad */
-
-#ifdef ENABLE_TRACE
-#define _FW_OPTION_TRACE_EN FW_OPTION_TRACE_EN
-#else
-#define _FW_OPTION_TRACE_EN 0
-#endif
-
-#ifdef CONTINUOUS_CLOCKING
-#define _FW_OPTION_CONT_CLOCK FW_OPTION_CONT_CLOCK
-#else
-#define _FW_OPTION_CONT_CLOCK 0
-#endif
- .long _FW_OPTION_TRACE_EN | _FW_OPTION_CONT_CLOCK /* 0x08 */
-
- /*
- * Config area
- */
- .org 0x400 + HDR_CMD_STAT_AREA
-_cmd_stat_base:
- .long SRAM_BASE_BE
+ .macro pause_nops reg:req
+ moveq.l #32,\reg
+0: nop
+ subq.l #1,\reg
+ bne 0b
+ .endm
/*
* Main entry point
@@ -212,18 +241,22 @@ _start:
/* Get base addresses */
lea %pc@(_cmd_stat_base),%a0
movea.l %a0@(0),%a1
- movea.l #GPIO_BASE,%a4
- movea.l %a4,%a5
- add.l #CLOCK_GPIO_VREG,%a5
- add.l #DATA_GPIO_VREG,%a4
- movea.l #CVIC_BASE,%a2
-
- /* Store some diagnostic stuff */
- move.l %d0,%a1@(CF_RESET_D0)
- move.l %d1,%a1@(CF_RESET_D1)
+ movea.l #GPIO_BASE,%a2
+ movea.l %a2,%a5
+ mvz.w %pc@(_clk_gpio_vreg),%d0
+ add.l %d0,%a5
+ movea.l %a2,%a4
+ mvz.w %pc@(_dat_gpio_vreg),%d0
+ add.l %d0,%a4
/* Cache clock bit number */
- moveq.l #CLOCK_GPIO_BIT,%d5
+ mvz.b %pc@(_clk_gpio_bit),%d5
+
+ /* Coldfire sets D0 and D1 to special configuration values at reset,
+ * we capture them here for diagnostics purposes
+ */
+ move.l %d0,%a1@(CF_RESET_D0)
+ move.l %d1,%a1@(CF_RESET_D1)
/*
* Check arbitration register early. There's a case where an arbitration request
@@ -241,6 +274,45 @@ _start:
bne 0b
1: /*
+ * Code patching tricks for performance reasons
+ */
+
+ /* If trans GPIO doesn't share a register with either data
+ * nor clock, we can remove some cache reloads
+ */
+ mvs.w %pc@(_tra_gpio_vreg),%d0
+ mvs.w %pc@(_clk_gpio_vreg),%d1
+ cmp.l %d0,%d1
+ beq 1f
+ mvs.w %pc@(_dat_gpio_vreg),%d1
+ cmp.l %d0,%d1
+ beq 1f
+ lea %pc@(_config_out_load_ret),%a0
+ move.w #0x4e75,%a0@(0)
+ bsr cache_flush
+ lea %pc@(_config_in_load_ret),%a0
+ move.w #0x4e75,%a0@(0)
+ bsr cache_flush
+
+ /* Check if extra dummy read required */
+1: move.l %pc@(_fw_ctrl),%d0
+ moveq.l #FW_CONTROL_DUMMY_RD,%d1
+ and.l %d0,%d1
+ bne 1f
+ lea %pc@(_clk_in_patch0),%a0
+ move.w #0x4e71,%a0@(0)
+ bsr cache_flush
+ lea %pc@(_clk_in_patch1),%a0
+ move.w #0x4e71,%a0@(0)
+ bsr cache_flush
+ lea %pc@(_clk_in_patch2),%a0
+ move.w #0x4e71,%a0@(0)
+ bsr cache_flush
+ lea %pc@(_clk_in_patch3),%a0
+ move.w #0x4e71,%a0@(0)
+ bsr cache_flush
+
+1: /*
* Load GPIO values into caches and set initial values
*
* Note: We load from the "Data Read" register which
@@ -249,35 +321,45 @@ _start:
* the value sampled on the line. The reason is that
* the value can be missing recent changes due to
* being behind synchronizers.
- */
- move.l %a5@(CLOCK_GPIO_DREG-CLOCK_GPIO_VREG),%DCLK
- move.l %a4@(DATA_GPIO_DREG-DATA_GPIO_VREG),%DDAT
- move.l %a5@(TRANS_GPIO_DREG-CLOCK_GPIO_VREG),%DTRA
- bset.l #CLOCK_GPIO_BIT,%DCLK
- bset.l #DATA_GPIO_BIT,%DDAT
- bset.l #TRANS_GPIO_BIT,%DTRA
+ *
+ * Since the trans GPIO may overlap the data or clock one,
+ * and we don't always keep a cache of it, we set it up first before
+ * we load the caches.
+ */
+
+ /* Setup TRANS GPIO */
+ mvs.w %pc@(_tra_gpio_dreg),%d3
+ mvs.b %pc@(_tra_gpio_bit),%d1
+ move.l %a2@(%d3),%d0
+ mvs.w %pc@(_tra_gpio_vreg),%d3
+ bset.l %d1,%d0
+ move.l %d0,%a2@(%d3)
+ addq.l #4,%d3
+ move.l %a2@(%d3),%d0
+ bset.l %d1,%d0
+ move.l %d0,%a2@(%d3)
+
+ /* Load CLK and DAT GPIO caches */
+ bsr load_gpio_caches
+
+ /* Set initial CLK and DAT GPIOs */
+ bset.l %d5,%DCLK
+ mvs.b %pc@(_dat_gpio_bit),%d1
+ bset.l %d1,%DDAT
move.l %DCLK,%a5@(0)
move.l %DDAT,%a4@(0)
- move.l %DTRA,%a5@(TRANS_GPIO_VREG-CLOCK_GPIO_VREG)
- /* Configure all GPIOs as output */
+ /* Configure CLK and DAT as output */
move.l %a5@(4),%d0
- bset.l #CLOCK_GPIO_BIT,%d0
+ bset.l %d5,%d0
move.l %d0,%a5@(4)
move.l %a4@(4),%d0
- bset.l #DATA_GPIO_BIT,%d0
+ bset.l %d1,%d0
move.l %d0,%a4@(4)
- move.l %a5@(TRANS_GPIO_VREG-CLOCK_GPIO_VREG+4),%d0
- bset.l #TRANS_GPIO_BIT,%d0
- move.l %d0,%a5@(TRANS_GPIO_VREG-CLOCK_GPIO_VREG+4)
/* Initialize A6 to point to command area */
lea %a1@(CMD_DATA),%a6
- /* Clear interrupt count */
- moveq.l #0,%d0
- move.l %d0,%a1@(INT_CNT)
-
/* Install external interrupt vector */
lea %pc@(_int),%a0
move.l %a0,(0x46*4)
@@ -295,25 +377,43 @@ _start:
* Main command loop
*/
main_loop:
+ /* Load control bits */
+ move.l %pc@(_fw_ctrl),%d0
+
+wait_cmd:
/* Reset trace pointer */
#ifdef ENABLE_TRACE
lea %a1@(TRACEBUF),%a3
#endif
/* Wait arbitration request or command */
-1: move.b %a1@(ARB_REG),%d2
+ move.b %a1@(ARB_REG),%d2
bne arbitration_request
move.l %a1@(CMD_STAT_REG),%d2
tst.b %d2
bne command_request
-#ifdef CONTINUOUS_CLOCKING
+
+ /* Are we doing continuous clocking ? */
+ moveq.l #FW_CONTROL_CONT_CLOCK,%d1
+ and.l %d0,%d1
+ beq 1f
clock_toggle
-#else
-#ifndef NO_SW_IRQ
+ pause_nops %d1
+ bra wait_cmd
+
+ /* Can we use STOP instructions ? */
+1: moveq.l #FW_CONTROL_USE_STOP,%d1
+ and.l %d0,%d1
+ beq 1f
+ count_stop
stop #0x2000
move.w #0x2007,%sr
-#endif
-#endif
- bra 1b
+ bra wait_cmd
+
+ /* Neither continuous clocks nor STOP, use some nops
+ * and go back
+ */
+1: pause_nops %d1
+ bra wait_cmd
arbitration_request:
/* Ack request */
@@ -324,29 +424,27 @@ arb_wait:
move.b %a1@(ARB_REG),%d1
beq arb_done
-#ifdef NO_SW_IRQ
- /* In absence of SW irq, just loop with some NOPs to avoid
- * hammering the bus too hard
- */
- moveq.l #32,%d0
-0: nop
- subq.l #1,%d0
- bne 0b
- bra arb_wait
-#else
+ /* Control bits still in %d0 */
+ moveq.l #FW_CONTROL_USE_STOP,%d1
+ and.l %d0,%d1
+ beq 1f
+
/* Wait, we'll get an interrupt when the host clears it */
+ count_stop
stop #0x2000
move.w #0x2007,%sr
bra arb_wait
-#endif
+
+1: /* In absence of SW irq, just loop with some NOPs to avoid
+ * hammering the bus too hard
+ */
+ pause_nops %d1
+ bra arb_wait
arb_done:
/* Got it, re-load the GPIO caches */
- move.l %a5@(0),%DCLK
- move.l %a4@(0),%DDAT
-#ifndef DTRA_UNCACHED
- move.l %a5@(TRANS_GPIO_DREG-CLOCK_GPIO_VREG),%DTRA
-#endif
+ bsr load_gpio_caches
+
/* Reconfigure data as output just in case ... */
bsr config_gpio_out
@@ -378,10 +476,16 @@ command_request:
/*
* Process a command
*/
+
+ /* This seems to help performance, probably cache alignemnt/aliasing
+ * of some loops. Rather random but heh...
+ */
+ .balign 0x10
start_command:
/* Start bit */
+ mvz.b %pc@(_dat_gpio_bit),%d1
moveq.l #0,%d0
- clock_out_bit %d0
+ clock_out_bit %d0,%d1
trace #TR_CLKOSTART
/* Load first lword and invert it */
@@ -404,7 +508,7 @@ start_command:
/* Clock out 32 bits */
sub.l %d3,%d2
-0: clock_out_bit %d4
+0: clock_out_bit %d4,%d1
lsl.l #1,%d4
subq.l #1,%d3
bne 0b
@@ -419,14 +523,14 @@ start_command:
beq 2f
trace #TR_OLEN
trace %d3
-0: clock_out_bit %d4
+0: clock_out_bit %d4,%d1
lsl.l #1,%d4
subq.l #1,%d3
bne 0b
2: /* Done sending, ready to receive, first echo delay */
move.b %a1@(ECHO_DLY_REG),%d3 /* d3 is already 0 */
- clock_out_zeros %d3
+ clock_out_zeros %d3,%d0
/* Set GPIO and transceivers to input */
bsr config_gpio_in
@@ -434,8 +538,9 @@ start_command:
/* Wait for start bit */
move.l #1000,%d3
trace #TR_CLKWSTART
+ mvz.b %pc@(_dat_gpio_bit),%d1
0: moveq #0,%d4
- clock_in_bit %d4,%d0,%d1
+ clock_in_bit %d4,%d0,%d1,0
/* We read inverted value, so wait for a "0" */
btst #0,%d4
beq 1f
@@ -450,7 +555,7 @@ start_command:
moveq #4,%d3
moveq #0,%d4
0: lsl.l #1,%d4
- clock_in_bit %d4,%d0,%d1
+ clock_in_bit %d4,%d0,%d1,1
subq.l #1,%d3
bne 0b
@@ -482,7 +587,7 @@ start_command:
move.l %d2,%d3
moveq.l #0,%d4
0: lsl.l #1,%d4
- clock_in_bit %d4,%d0,%d1
+ clock_in_bit %d4,%d0,%d1,2
subq.l #1,%d3
bne 0b
@@ -495,7 +600,7 @@ start_command:
moveq.l #4,%d3
moveq.l #0,%d4
0: lsl.l #1,%d4
- clock_in_bit %d4,%d0,%d1
+ clock_in_bit %d4,%d0,%d1,3
subq.l #1,%d3
bne 0b
@@ -518,7 +623,7 @@ send_delay:
/* Send delay after every command */
moveq.l #0,%d3
move.b %a1@(SEND_DLY_REG),%d3
- clock_out_zeros %d3
+ clock_out_zeros %d3,%d0
/* Configure GPIOs to output */
bsr config_gpio_out
@@ -529,11 +634,12 @@ send_delay:
start_break:
/* Clock some 1's to pace and flush out whatever's going on */
move.l #FSI_PRE_BREAK_CLOCKS,%d3
- clock_out_zeros %d3
+ clock_out_zeros %d3,%d0
/* Clock out the break */
+ mvz.b %pc@(_dat_gpio_bit),%d1
moveq.l #0,%d0
- clock_out_bit %d0
+ clock_out_bit %d0,%d1
move.l #(FSI_BREAK_CLOCKS-1),%d3
0: clock_toggle
subq.l #1,%d3
@@ -541,7 +647,7 @@ start_break:
/* Clock some more 1's to resync (includes the send delay) */
move.l #FSI_POST_BREAK_CLOCKS,%d3
- clock_out_zeros %d3
+ clock_out_zeros %d3,%d0
/* End trace */
trace #TR_END
@@ -560,7 +666,7 @@ start_idle_clocks:
/* Clock them out */
moveq #0,%d3
move.b %d2,%d3
- clock_out_zeros %d3
+ clock_out_zeros %d3,%d0
/* End trace */
trace #TR_END
@@ -574,32 +680,69 @@ start_idle_clocks:
config_gpio_out:
/* Configure data GPIO as output, value 1 (idle) */
- bset.l #DATA_GPIO_BIT,%DDAT
+ mvs.b %pc@(_dat_gpio_bit),%d1
+ bset.l %d1,%DDAT
move.l %DDAT,%a4@(0)
move.l %a4@(4),%d0
- bset.l #DATA_GPIO_BIT,%d0
+ bset.l %d1,%d0
move.l %d0,%a4@(4)
/* Set transceivers to output */
-#ifdef DTRA_UNCACHED
- move.l %a5@(TRANS_GPIO_DREG-CLOCK_GPIO_VREG),%DTRA
+ mvs.b %pc@(_tra_gpio_bit),%d1
+#ifndef DCLK_DDAT_SHARED
+ mvs.w %pc@(_tra_gpio_dreg),%d3
+ move.l %a2@(%d3),%DTRA
#endif
- bset.l #TRANS_GPIO_BIT,%DTRA
- move.l %DTRA,%a5@(TRANS_GPIO_VREG-CLOCK_GPIO_VREG)
- rts
+ mvs.w %pc@(_tra_gpio_vreg),%d3
+ bset.l %d1,%DTRA
+ move.l %DTRA,%a2@(%d3)
-config_gpio_in:
- /* Set transceiver to input */
-#ifdef DTRA_UNCACHED
- move.l %a5@(TRANS_GPIO_DREG-CLOCK_GPIO_VREG),%DTRA
-#endif
- bclr.l #TRANS_GPIO_BIT,%DTRA
- move.l %DTRA,%a5@(TRANS_GPIO_VREG-CLOCK_GPIO_VREG)
+ /* Reload caches in case of collision & return
+ *
+ * This can be patched out if unnecessary
+ */
+_config_out_load_ret:
+ bra load_gpio_caches
+config_gpio_in:
/* Configure data GPIO as input */
+ mvs.b %pc@(_dat_gpio_bit),%d1
move.l %a4@(4),%d0
- bclr.l #DATA_GPIO_BIT,%d0
+ bclr.l %d1,%d0
move.l %d0,%a4@(4)
+
+ /* Set transceiver to input */
+ mvs.b %pc@(_tra_gpio_bit),%d1
+#ifndef DCLK_DDAT_SHARED
+ mvs.w %pc@(_tra_gpio_dreg),%d3
+ move.l %a2@(%d3),%DTRA
+#endif
+ mvs.w %pc@(_tra_gpio_vreg),%d3
+ bclr.l %d1,%DTRA
+ move.l %DTRA,%a2@(%d3)
+
+ /* Reload caches in case of collision & return */
+_config_in_load_ret:
+ bra load_gpio_caches
+
+load_gpio_caches:
+ mvs.w %pc@(_clk_gpio_dreg),%d0
+ move.l %a2@(%d0),%DCLK
+#ifdef DCLK_DDAT_SHARED
+ mvs.w %pc@(_tra_gpio_dreg),%d0
+ move.l %a2@(%d0),%DTRA
+#else
+ mvs.w %pc@(_dat_gpio_dreg),%d0
+ move.l %a2@(%d0),%DDAT
+#endif
+ rts
+
+ /* Flush D and I cache for address %a0 */
+cache_flush:
+ move.l %a0,DFLUSH_BASE
+ nop
+ move.l %a0,IFLUSH_BASE
+ nop
rts
/* Interrupt handler
@@ -608,22 +751,29 @@ config_gpio_in:
*/
_int:
move.l %d0,%a7@-
- addq.l #1,%a1@(INT_CNT)
+ move.l %a0,%a7@-
+ count_ints
moveq.l #CVIC_SW_IRQ,%d0
- move.l %d0,%a2@(CVIC_SW_IRQ_CLR)
- move.l %a2@(CVIC_SW_IRQ_CLR),%d0
+ movea.l #CVIC_BASE,%a0
+ move.l %d0,%a0@(CVIC_SW_IRQ_CLR)
+ move.l %a0@(CVIC_SW_IRQ_CLR),%d0
move.l %a7@+,%d0
+ movea.l %a7@+,%a0
rte
/* Bad exception handler */
bad_exception:
move.l %a7@+,%d0
- move.l _bad_exceptions + 4,%d1
+ move.l #(_bad_exceptions + 4),%d1
sub.l %d1,%d0
lsr.l #2,%d0
- move.b %d0,%a1@(BAD_INT_VEC)
+ move.l %d0,%a1@(BAD_INT_VEC)
+ move.l %a7@+,%d0
+ move.l %d0,%a1@(BAD_INT_S0)
+ move.l %a7@+,%d0
+ move.l %d0,%a1@(BAD_INT_S1)
move.b #STAT_ERR_INVAL_IRQ,%a1@(CMD_STAT_REG)
- halt
+ bra .
/* Bad exception stubs */
_bad_exceptions:
@@ -635,3 +785,6 @@ _bad_exceptions:
.balign 0x10
.space STACK_SIZE
_stack_top:
+ /* Dummy padding */
+ .space 0x10
+_end:
diff --git a/cf-code/cf-fsi-palmetto.h b/cf-code/cf-fsi-palmetto.h
deleted file mode 100644
index c11b116..0000000
--- a/cf-code/cf-fsi-palmetto.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-#define CLOCK_GPIO_VREG 0x000
-#define CLOCK_GPIO_DREG 0x0c0
-#define CLOCK_GPIO_BIT 4
-#define DATA_GPIO_VREG 0x000
-#define DATA_GPIO_DREG 0x0c0
-#define DATA_GPIO_BIT 5
-#define TRANS_GPIO_VREG 0x020
-#define TRANS_GPIO_DREG 0x0c4
-#define TRANS_GPIO_BIT 30
-
-#define SYS_SIG SYS_SIG_PALMETTO
-
-#define NO_SW_IRQ
-#define CONTINUOUS_CLOCKING
-#define EXTRA_DUMMY_READ
diff --git a/cf-code/cf-fsi-romulus.h b/cf-code/cf-fsi-romulus.h
deleted file mode 100644
index 3ffdc67..0000000
--- a/cf-code/cf-fsi-romulus.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-#define CLOCK_GPIO_VREG 0x1e0
-#define CLOCK_GPIO_DREG 0x0d8
-#define CLOCK_GPIO_BIT 16
-#define DATA_GPIO_VREG 0x1e0
-#define DATA_GPIO_DREG 0x0d8
-#define DATA_GPIO_BIT 18
-#define TRANS_GPIO_VREG 0x080
-#define TRANS_GPIO_DREG 0x0d0
-#define TRANS_GPIO_BIT 10
-
-#define SYS_SIG SYS_SIG_ROMULUS
diff --git a/cf-code/cf-fsi-shared.h b/cf-code/cf-fsi-shared.h
new file mode 100644
index 0000000..95ab549
--- /dev/null
+++ b/cf-code/cf-fsi-shared.h
@@ -0,0 +1,3 @@
+#define SYS_SIG SYS_SIG_SHARED
+
+#define DCLK_DDAT_SHARED
diff --git a/cf-code/cf-fsi-split.h b/cf-code/cf-fsi-split.h
new file mode 100644
index 0000000..70806bc
--- /dev/null
+++ b/cf-code/cf-fsi-split.h
@@ -0,0 +1 @@
+#define SYS_SIG SYS_SIG_SPLIT
diff --git a/cf-code/cf-fsi-witherspoon.h b/cf-code/cf-fsi-witherspoon.h
deleted file mode 100644
index 87507e5..0000000
--- a/cf-code/cf-fsi-witherspoon.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-#define CLOCK_GPIO_VREG 0x1e0
-#define CLOCK_GPIO_DREG 0x0d8
-#define CLOCK_GPIO_BIT 16
-#define DATA_GPIO_VREG 0x020
-#define DATA_GPIO_DREG 0x0c4
-#define DATA_GPIO_BIT 0
-#define TRANS_GPIO_VREG 0x080
-#define TRANS_GPIO_DREG 0x0d0
-#define TRANS_GPIO_BIT 10
-
-#define SYS_SIG SYS_SIG_WITHERSPOON
diff --git a/cf-fsi-fw.h b/cf-fsi-fw.h
index 5d80bf5..712df04 100644
--- a/cf-fsi-fw.h
+++ b/cf-fsi-fw.h
@@ -25,19 +25,32 @@
/* Info: Signature & version */
#define HDR_SYS_SIG 0x00 /* 2 bytes system signature */
-#define SYS_SIG_ROMULUS 0x526d /* 'Rm' */
-#define SYS_SIG_WITHERSPOON 0x5773 /* 'Ws' */
-#define SYS_SIG_PALMETTO 0x5061 /* 'Pa' */
+#define SYS_SIG_SHARED 0x5348
+#define SYS_SIG_SPLIT 0x5350
#define HDR_FW_VERS 0x02 /* 2 bytes Major.Minor */
#define HDR_API_VERS 0x04 /* 2 bytes Major.Minor */
-#define API_VERSION_MAJ 1 /* Current version */
+#define API_VERSION_MAJ 2 /* Current version */
#define API_VERSION_MIN 1
#define HDR_FW_OPTIONS 0x08 /* 4 bytes option flags */
#define FW_OPTION_TRACE_EN 0x00000001 /* FW tracing enabled */
-#define FW_OPTION_CONT_CLOCK 0x00000002 /* Continuous clocking */
+#define FW_OPTION_CONT_CLOCK 0x00000002 /* Continuous clocking supported */
+#define HDR_FW_SIZE 0x10 /* 4 bytes size for combo image */
/* Boot Config: Address of Command/Status area */
#define HDR_CMD_STAT_AREA 0x80 /* 4 bytes CF address */
+#define HDR_FW_CONTROL 0x84 /* 4 bytes control flags */
+#define FW_CONTROL_CONT_CLOCK 0x00000002 /* Continuous clocking enabled */
+#define FW_CONTROL_DUMMY_RD 0x00000004 /* Extra dummy read (AST2400) */
+#define FW_CONTROL_USE_STOP 0x00000008 /* Use STOP instructions */
+#define HDR_CLOCK_GPIO_VADDR 0x90 /* 2 bytes offset from GPIO base */
+#define HDR_CLOCK_GPIO_DADDR 0x92 /* 2 bytes offset from GPIO base */
+#define HDR_DATA_GPIO_VADDR 0x94 /* 2 bytes offset from GPIO base */
+#define HDR_DATA_GPIO_DADDR 0x96 /* 2 bytes offset from GPIO base */
+#define HDR_TRANS_GPIO_VADDR 0x98 /* 2 bytes offset from GPIO base */
+#define HDR_TRANS_GPIO_DADDR 0x9a /* 2 bytes offset from GPIO base */
+#define HDR_CLOCK_GPIO_BIT 0x9c /* 1 byte bit number */
+#define HDR_DATA_GPIO_BIT 0x9d /* 1 byte bit number */
+#define HDR_TRANS_GPIO_BIT 0x9e /* 1 byte bit number */
/*
* Command/Status area layout: Main part
@@ -117,6 +130,11 @@
/* Misc2 */
#define CF_RESET_D0 0x50
#define CF_RESET_D1 0x54
+#define BAD_INT_S0 0x58
+#define BAD_INT_S1 0x5c
+#define STOP_CNT 0x60
+
+/* Internal */
/*
* SRAM layout: Trace buffer (debug builds only)
diff --git a/cf-fsi-test.c b/cf-fsi-test.c
index a9d650e..b800ff2 100644
--- a/cf-fsi-test.c
+++ b/cf-fsi-test.c
@@ -293,16 +293,79 @@ static void start_cf(void)
writel(SCU_COPRO_CLK_EN, sysreg + SCU_COPRO_CTRL);
}
-static void load_cf_code(void)
+#ifdef ROMULUS
+#define WANTED_SIG SYS_SIG_SHARED
+static void setup_cf_config(void)
+{
+ void *base = cfmem + HDR_OFFSET;
+
+ writew(htons(0x01e0), base + HDR_CLOCK_GPIO_VADDR);
+ writew(htons(0x00d8), base + HDR_CLOCK_GPIO_DADDR);
+ writew(htons(0x01e0), base + HDR_DATA_GPIO_VADDR);
+ writew(htons(0x00d8), base + HDR_DATA_GPIO_DADDR);
+ writew(htons(0x0080), base + HDR_TRANS_GPIO_VADDR);
+ writew(htons(0x00d0), base + HDR_TRANS_GPIO_DADDR);
+ writeb(16, base + HDR_CLOCK_GPIO_BIT);
+ writeb(18, base + HDR_DATA_GPIO_BIT);
+ writeb(10, base + HDR_TRANS_GPIO_BIT);
+ writel(htonl(FW_CONTROL_USE_STOP), base + HDR_FW_CONTROL);
+}
+#endif
+
+#ifdef PALMETTO
+#define WANTED_SIG SYS_SIG_SHARED
+static void setup_cf_config(void)
+{
+ void *base = cfmem + HDR_OFFSET;
+
+ writew(htons(0x0000), base + HDR_CLOCK_GPIO_VADDR);
+ writew(htons(0x00c0), base + HDR_CLOCK_GPIO_DADDR);
+ writew(htons(0x0000), base + HDR_DATA_GPIO_VADDR);
+ writew(htons(0x00c0), base + HDR_DATA_GPIO_DADDR);
+ writew(htons(0x0020), base + HDR_TRANS_GPIO_VADDR);
+ writew(htons(0x00c4), base + HDR_TRANS_GPIO_DADDR);
+ writeb(4, base + HDR_CLOCK_GPIO_BIT);
+ writeb(5, base + HDR_DATA_GPIO_BIT);
+ writeb(30, base + HDR_TRANS_GPIO_BIT);
+ writel(htonl(FW_CONTROL_CONT_CLOCK|FW_CONTROL_DUMMY_RD), base + HDR_FW_CONTROL);
+}
+#endif
+
+static uint8_t *find_cf_code(uint16_t want_sig, size_t *out_size)
{
extern uint8_t cf_code_start, cf_code_end;
+ uint8_t *start = &cf_code_start;
+ uint8_t *end = &cf_code_end;
+ size_t size;
+ uint16_t sig;
+
+ while(start < end) {
+ sig = ntohs(*(uint16_t *)(start + HDR_OFFSET + HDR_SYS_SIG));
+ size = ntohl(*(uint32_t *)(start + HDR_OFFSET + HDR_FW_SIZE));
+ if (sig == want_sig) {
+ *out_size = size;
+ return start;
+ }
+ start += size;
+ }
+ return NULL;
+}
+
+static void load_cf_code(void)
+{
uint16_t sig, fw_vers, api_vers;
uint32_t fw_options;
-
- uint8_t *code = &cf_code_start;
+ uint8_t *code, *end;
+ size_t size;
uint8_t *mem = cfmem;
- while(code < &cf_code_end)
+ code = find_cf_code(WANTED_SIG, &size);
+ if (!code) {
+ printf("Can't find code signature %04x\n", WANTED_SIG);
+ exit(1);
+ }
+ end = code + size;
+ while(code < end)
writeb(*(code++), mem++);
sig = ntohs(readw(cfmem + HDR_OFFSET + HDR_SYS_SIG));
@@ -316,6 +379,7 @@ static void load_cf_code(void)
sig, fw_vers, api_vers >> 8, api_vers & 0xff,
trace_enabled ? "enabled" : "disabled");
+ setup_cf_config();
}
#ifdef ROMULUS
@@ -690,12 +754,20 @@ static void dump_stuff(void)
{
int i;
- printf("CMD:%08x RTAG=%02x RCRC=%02x RDATA=%02x #INT=%08x\n",
+ printf("CMD:%08x RTAG=%02x RCRC=%02x RDATA=%02x BAD_INT=%08x (%08x %08x)\n",
ntohl(readl(sysreg + SRAM_BASE + CMD_STAT_REG)),
readb(sysreg + SRAM_BASE + STAT_RTAG),
readb(sysreg + SRAM_BASE + STAT_RCRC),
ntohl(readl(sysreg + SRAM_BASE + RSP_DATA)),
- ntohl(readl(sysreg + SRAM_BASE + INT_CNT)));
+ ntohl(readl(sysreg + SRAM_BASE + BAD_INT_VEC)),
+ ntohl(readl(sysreg + SRAM_BASE + BAD_INT_S0)),
+ ntohl(readl(sysreg + SRAM_BASE + BAD_INT_S1)));
+ if (trace_enabled) {
+ printf("#INT=%08x #CLK=%08x #STOP=%08x\n",
+ ntohl(readl(sysreg + SRAM_BASE + INT_CNT)),
+ ntohl(readl(sysreg + SRAM_BASE + CLK_CNT)),
+ ntohl(readl(sysreg + SRAM_BASE + STOP_CNT)));
+ }
for (i = 0; trace_enabled && i < 128; i++) {
uint8_t v = readb(sysreg + SRAM_BASE + TRACEBUF + i);
@@ -711,12 +783,21 @@ static void dump_stuff(void)
static int do_command(uint32_t op)
{
- uint32_t timeout = 100000;
+ uint32_t timeout = 1000000;
uint8_t stat;
+ /* Clear trace */
+ if (trace_enabled) {
+ memset(sysreg + SRAM_BASE + TRACEBUF, 0x00, 128);
+ (void)readl(sysreg + SRAM_BASE + CMD_STAT_REG);
+ }
+
/* Send command */
writel(htonl(op), sysreg + SRAM_BASE + CMD_STAT_REG);
+ /* Read back to avoid ordering issue */
+ (void)readl(sysreg + SRAM_BASE + CMD_STAT_REG);
+
/* Ring doorbell */
writel(0x2, sysreg + CVIC_BASE + CVIC_TRIG_REG);
@@ -733,6 +814,7 @@ static int do_command(uint32_t op)
if (stat == STAT_COMPLETE)
return 0;
+ dump_stuff();
switch(stat) {
case STAT_ERR_INVAL_CMD:
return -EINVAL;
@@ -879,7 +961,7 @@ void bench(void)
int main(int argc, char *argv[])
{
- uint32_t val;
+ uint32_t val, timeout;
open_mem();
@@ -906,8 +988,14 @@ int main(int argc, char *argv[])
start_cf();
/* Wait for status register to say command complete */
+ timeout = 10000;
do {
+ if (!--timeout) {
+ printf("Startup failed !\n");
+ dump_stuff();
+ }
val = readl(sysreg + SRAM_BASE + CF_STARTED);
+ usleep(10);
} while (val == 0x00);
/* Configure echo & send delay */
@@ -924,6 +1012,8 @@ int main(int argc, char *argv[])
/* Let it run for a bit */
sleep(1);
#endif
+ dump_stuff();
+
/* Send break */
test_break();
diff --git a/dist-bin/cf-fsi-palmetto.bin b/dist-bin/cf-fsi-palmetto.bin
deleted file mode 100755
index 2cefe77..0000000
--- a/dist-bin/cf-fsi-palmetto.bin
+++ /dev/null
Binary files differ
diff --git a/dist-bin/cf-fsi-romulus.bin b/dist-bin/cf-fsi-romulus.bin
deleted file mode 100755
index 7aad823..0000000
--- a/dist-bin/cf-fsi-romulus.bin
+++ /dev/null
Binary files differ
diff --git a/dist-bin/cf-fsi-witherspoon.bin b/dist-bin/cf-fsi-witherspoon.bin
deleted file mode 100755
index f81870a..0000000
--- a/dist-bin/cf-fsi-witherspoon.bin
+++ /dev/null
Binary files differ
OpenPOWER on IntegriCloud