summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile27
-rw-r--r--cf-code.s438
-rw-r--r--cf-fsi-test.c664
-rw-r--r--cf-wrapper.S9
4 files changed, 1138 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..a75f0a1
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,27 @@
+M68KCROSS ?= /opt/cross/binutils-coldfire/bin/m68k-unknown-elf-
+M68KAS=$(M68KCROSS)as
+M68KLD=$(M68KCROSS)ld
+M68KOC=$(M68KCROSS)objcopy
+
+CC = $(CROSS_COMPILE)gcc
+CFLAGS = -Wall -Os
+
+all: cf-fsi-test
+
+cf-code.o : cf-code.s
+ $(M68KAS) -march=isac $^ -o $@
+
+cf-code.elf : cf-code.o
+ $(M68KLD) -Ttext 0 $^ -o $@
+
+cf-code.bin : cf-code.elf
+ $(M68KOC) -O binary $^ $@
+
+cf-wrapper.o : cf-wrapper.S cf-code.bin
+ $(CC) $(CFLAGS) -c cf-wrapper.S -o $@
+
+cf-fsi-test : cf-fsi-test.o cf-wrapper.o
+ $(CC) $(CFLAGS) $^ -o $@
+
+clean:
+ rm -rf cf-fsi-test *.o *.elf *.bin
diff --git a/cf-code.s b/cf-code.s
new file mode 100644
index 0000000..725ddd3
--- /dev/null
+++ b/cf-code.s
@@ -0,0 +1,438 @@
+ .text
+
+ .equ TRACE, 0
+_vecs:
+ /* Vectors */
+ .org 0
+
+ /* Boot vector */
+ .long 0x0ffff0 /* Stack below 1M */
+ .long 0x400 /* Start code */
+
+ /*
+ * Remaining 254 vectors point to corresponding stubs
+ * starting at 0x10000, 0x10 bytes each
+ */
+ .rept 254
+0: .long 0x10000 + (0b - _vecs) * 4
+ .endr
+
+ /* Main entry */
+ .org 0x400
+ .global _start
+_start:
+ .equ SRAM_BASE_BE, 0x320000
+ .equ SRAM_BASE_LE, 0x720000
+ .equ GPIO_BASE, 0x780000
+ .equ CVIC_BASE, 0x6c2000
+
+ .equ CVIC_STATUS, 0x00
+ .equ CVIC_SW_IRQ_CLR, 0x1c
+ .equ CVIC_SW_IRQ, 0x2
+
+ /* XXX Romulus specific definitions */
+ .equ GPIO_YZAAAB_DATA, 0x1e0
+ .equ GPIO_YZAAAB_DIR, 0x1e4
+ .equ GPIO_CLOCK_BIT, 16
+ .equ GPIO_DATA_BIT, 18
+ .equ GPIO_QRST_DATA, 0x80
+ .equ GPIO_QRST_TRANS_BIT, 10
+
+ /**** SRAM layout ****/
+
+ /* Command register:
+ *
+ * +---------------------------+
+ * | rsvd | RLEN | CLEN | CMD |
+ * | 8 | 8 | 8 | 8 |
+ * +---------------------------+
+ * | | |
+ * Response len | |
+ * (in bits) | |
+ * | |
+ * Command len |
+ * (in bits) |
+ * |
+ * Command code
+ */
+ .equ CMD_REG, 0x00
+ .equ CMD_NONE, 0x00
+ .equ CMD_COMMAND, 0x01
+ .equ CMD_BREAK, 0x02
+
+ /* Status register
+ *
+ */
+ .equ STAT_REG, 0x04 /* Status */
+ .equ STAT_STOPPED, 0x00
+ .equ STAT_SENDING, 0x01
+ .equ STAT_COMPLETE, 0x02
+ .equ STAT_ERR_INVAL_CMD, 0x80
+ .equ STAT_ERR_INVAL_IRQ, 0x81
+ .equ STAT_ERR_MTOE, 0x82
+
+ /* Response tag */
+ .equ STAT_RTAG, 0x05
+
+ /* Response CRC */
+ .equ STAT_RCRC, 0x06
+
+ /* Command data area
+ *
+ * Last byte of message must be left aligned
+ */
+ .equ CMD_DATA, 0x10 /* 64 bit of data */
+
+ /* Response data area, right aligned, unused top bits are 1 */
+ .equ RSP_DATA, 0x20 /* 32 bit of data */
+
+ /* Misc */
+ .equ INT_CNT, 0x30 /* 32-bit interrupt count */
+ .equ BAD_INT_VEC, 0x34
+ .equ TRACEBUF, 0x40
+ .equ TR_CLKOSTART, 0x00
+ .equ TR_OLEN, 0x01/* + len */
+ .equ TR_CLKOBIT0, 0x02
+ .equ TR_CLKOBIT1, 0x03
+ .equ TR_CLKZ, 0x04 /* + count */
+ .equ TR_CLKWSTART, 0x05
+ .equ TR_CLKTAG, 0x06
+ .equ TR_CLKDATA, 0x07 /* + len */
+ .equ TR_CLKCRC, 0x08 /* + raw crc */
+ .equ TR_CLKIBIT0, 0x80
+ .equ TR_CLKIBIT1, 0x81
+
+ /* Useful macros */
+
+ .if TRACE == 1
+ .macro trace op:req
+ move.b \op,%a3@+
+ .endm
+ .else
+ .macro trace op:req
+ .endm
+ .endif
+
+ /* clock_toggle: toggle the clock down and back up */
+ .macro clock_toggle
+ bclr.l #GPIO_CLOCK_BIT,%d7 /* clock low */
+ move.l %d7,%a1@(0)
+ bset.l #GPIO_CLOCK_BIT,%d7 /* clock high */
+ move.l %d7,%a1@(0)
+ .endm
+
+ /* clock_out_bit reg: Clock out bit 31 of reg */
+ /* XXX TODO: only write to GPIO if value changes */
+ /* XXX TODO: can probably optimize further using shifts & logical ops rather than branches */
+ .macro clock_out_bit reg:req
+ btst.l #31,\reg
+ beq 98f
+ bset.l #GPIO_DATA_BIT,%d7
+ trace #TR_CLKOBIT1
+ bra 99f
+98: bclr.l #GPIO_DATA_BIT,%d7
+ trace #TR_CLKOBIT0
+99: move.l %d7,%a1@(0)
+ clock_toggle
+ .endm
+
+ /* clock_zeros reg: Clock out zeros (GPIO set to 1), assume at least 1 */
+ .macro clock_out_zeros reg:req
+ trace #TR_CLKZ
+ trace \reg
+ bset.l #GPIO_DATA_BIT,%d7
+ move.l %d7,%a1@(0)
+99: clock_toggle
+ subq.l #1,\reg
+ bne 99b
+ .endm
+
+ /* clock_in_bit reg: Clocks in bit into bit 0 of reg, the rest is 0
+ * note: bit 0 of reg must already be cleared
+ */
+ .macro clock_in_bit reg:req tmp:req tmp2:req
+ bclr.l #GPIO_CLOCK_BIT,%d7 /* clock low */
+ move.l %d7,%a1@(0)
+ move.l %a1@(0),\tmp /* dummy read */
+ move.l %a1@(0),\tmp /* actual read */
+ bset.l #GPIO_CLOCK_BIT,%d7 /* clock high */
+ move.l %d7,%a1@(0)
+ moveq.l #GPIO_DATA_BIT,\tmp2
+ lsr.l \tmp2,\tmp
+ moveq.l #1,\tmp2
+ and.l \tmp2,\tmp
+ or.l \tmp,\reg
+ .if TRACE == 1
+ move.l #TR_CLKIBIT0,\tmp2
+ or.l \tmp,\tmp2
+ trace \tmp2
+ .endif
+ .endm
+
+ /* Register usage
+ *
+ * A0 : SRAM base (BE)
+ * A1 : GPIO address. This is he data register, we assume the direction
+ * register is at this +4
+ * A2: CVIC address.
+ * A3: TRACEBUF
+ * A6: CMD/RESP pointer
+ * D7 : clock GPIO value (and data on Romulus)
+ * D6 : loop counter
+ * D5 : command register
+ * D4 : data value
+ */
+
+ /* Get base addresses */
+ movea.l #SRAM_BASE_BE,%a0
+ movea.l #GPIO_BASE,%a1
+ add.l #GPIO_YZAAAB_DATA,%a1
+ movea.l #CVIC_BASE,%a2
+
+ /* Load GPIO value and Configure clock & data GPIO as output */
+ move.l %a1@(0),%d7
+ bset.l #GPIO_CLOCK_BIT,%d7
+ bset.l #GPIO_DATA_BIT,%d7
+ move.l %d7, %a1@(0)
+ move.l %a1@(4),%d0
+ bset.l #GPIO_CLOCK_BIT,%d0
+ bset.l #GPIO_DATA_BIT,%d0
+ move.l %d0,%a1@(4)
+
+ /* Cache GPIO value */
+ move.l %a1@(0),%d7
+
+ /* Clear interrupt count */
+ moveq.l #0,%d0
+ move.l %d0,%a0@(INT_CNT)
+
+ /* Install external interrupt vector */
+ lea _int,%a6
+ move.l %a6,(0x46*4)
+
+ /* Mask interrupts */
+ move.w #0x2000,%sr
+
+ /* Configure GPIOs to output */
+ bsr config_gpio_out
+
+ /**** Main loop ****/
+main_loop:
+ /* Initialize A6 to point to command area */
+ lea %a0@(CMD_DATA),%a6
+
+ lea %a0@(TRACEBUF),%a3
+
+ /* Wait for command */
+1: move.l %a0@(CMD_REG),%d5
+ tst.b %d5
+ bne 1f
+ stop #0x2000
+ bra 1b
+
+ /* Mask interrupts */
+1: move.w #0x2007,%sr
+
+ /* Mark ourselves as sending a command */
+ move.b #STAT_SENDING,%a0@(STAT_REG)
+
+ /* Clear command register */
+ move.b #CMD_NONE,%a0@(CMD_REG + 3)
+
+ /* Start command ? */
+ cmpi.b #CMD_COMMAND,%d5
+ beq start_command
+
+ /* Break command ? */
+ cmpi.b #CMD_BREAK,%d5
+ beq start_break
+
+ /* Error */
+ move.b #STAT_ERR_INVAL_CMD,%a0@(STAT_REG)
+ bra main_loop
+
+start_command:
+ /* Start bit */
+ moveq.l #0,%d0
+ clock_out_bit %d0
+ trace #TR_CLKOSTART
+
+ /* Load first lword and invert it */
+ move.l %a6@(0),%d4
+ not.l %d4
+
+ /* Shift command right to get bit count at bottom */
+ lsr.l #8,%d5
+
+ trace #TR_OLEN
+ trace %d5
+
+ /* More than 32 ? If not go to tail
+ *
+ * Note: This assumes we have at least 1 bit to clock
+ */
+ btst.b #5,%d5
+ beq 1f
+
+ /* Clock out 32 bits */
+ moveq #32,%d6
+ sub.l %d6,%d5
+0: clock_out_bit %d4
+ lsl.l #1,%d4
+ subq.l #1,%d6
+ bne 0b
+
+ /* Get remaining bits */
+ move.l %a6@(4),%d4
+ not.l %d4
+
+ /* Clock out what's left */
+1: moveq.l #0,%d6
+ move.b %d5,%d6
+ beq 2f
+ trace #TR_OLEN
+ trace %d6
+0: clock_out_bit %d4
+ lsl.l #1,%d4
+ subq.l #1,%d6
+ bne 0b
+
+2: /* Done sending, ready to receive, first echo delay */
+ moveq #16,%d6
+ clock_out_zeros %d6
+
+ /* Set GPIO and transceivers to input */
+ bsr config_gpio_in
+
+ /* Wait for start bit */
+ move.l #1000,%d6
+ trace #TR_CLKWSTART
+0: moveq #0,%d4
+ clock_in_bit %d4,%d0,%d1
+ /* We read inverted value, so wait for a "0" */
+ btst #0,%d4
+ beq 1f
+ subq.l #1,%d6
+ bne 0b
+ move.b #STAT_ERR_MTOE,%a0@(STAT_REG)
+ bra send_delay
+
+1: /* Got start bit, clock in slave ID and response tag */
+ trace #TR_CLKTAG
+ moveq #4,%d6
+ moveq #0,%d4
+0: lsl.l #1,%d4
+ clock_in_bit %d4,%d0,%d1
+ subq.l #1,%d6
+ bne 0b
+
+ /* Invert data */
+ not.l %d4
+
+ /* (not strictly needed: clean up top bits) */
+ moveq #0xf,%d0
+ and.l %d0,%d4
+
+ /* Store into STAT_RTAG for host */
+ move.b %d4,%a0@(STAT_RTAG)
+
+ /* Extract tag part */
+ moveq #0x7,%d0
+ and.l %d0,%d4
+
+ /* If non-0, no data, go get CRC */
+ bne 1f
+
+ /* Do we expect data ? */
+ lsr.l #8,%d5
+ beq 1f
+
+ /* Let's get data. Assume no more than 32-bits */
+ trace #TR_CLKDATA
+ trace %d5
+ move.l %d5,%d6
+ moveq.l #0,%d4
+0: lsl.l #1,%d4
+ clock_in_bit %d4,%d0,%d1
+ subq.l #1,%d6
+ bne 0b
+
+ /* Invert data and store it */
+ not.l %d4
+ move.l %d4,%a0@(RSP_DATA)
+
+1: /* Grab CRC */
+ trace #TR_CLKCRC
+ moveq.l #4,%d6
+ moveq.l #0,%d4
+0: lsl.l #1,%d4
+ clock_in_bit %d4,%d0,%d1
+ subq.l #1,%d6
+ bne 0b
+ trace %d4
+
+ /* Invert it, extract 4 bits, and store it */
+ not.l %d4
+ moveq.l #0xf,%d0
+ and.l %d0,%d4
+ move.b %d4,%a0@(STAT_RCRC)
+
+ /* Mark command complete */
+ move.b #STAT_COMPLETE,%a0@(STAT_REG)
+
+send_delay:
+ /* Configure GPIOs to output */
+ bsr config_gpio_out
+
+ /* Send delay after every command */
+ moveq #16,%d6
+ clock_out_zeros %d6
+ bra main_loop
+
+start_break:
+ move.b #STAT_COMPLETE,%a0@(STAT_REG)
+ bra main_loop
+
+config_gpio_out:
+ /* Configure data GPIO as output, value 1 (idle) */
+ bset.l #GPIO_DATA_BIT,%d7
+ move.l %d7,%a1@(0)
+ move.l %a1@(4),%d0
+ bset.l #GPIO_DATA_BIT,%d0
+ move.l %d0,%a1@(4)
+
+ /* Set transceivers to output */
+ move.l %a1@(GPIO_QRST_DATA-GPIO_YZAAAB_DATA),%d0
+ bset.l #GPIO_QRST_TRANS_BIT,%d0
+ move.l %d0,%a1@(GPIO_QRST_DATA-GPIO_YZAAAB_DATA)
+ rts
+
+config_gpio_in:
+ /* Set transceiver to input */
+ move.l %a1@(GPIO_QRST_DATA-GPIO_YZAAAB_DATA),%d0
+ bclr.l #GPIO_QRST_TRANS_BIT,%d0
+ move.l %d0,%a1@(GPIO_QRST_DATA-GPIO_YZAAAB_DATA)
+
+ /* Configure data GPIO as input */
+ move.l %a1@(4),%d0
+ bclr.l #GPIO_DATA_BIT,%d0
+ move.l %d0,%a1@(4)
+ rts
+
+ /* Interrupt handler */
+_int:
+ addq.l #1,%a0@(INT_CNT)
+ moveq.l #CVIC_SW_IRQ, %d0
+ move.l %d0,%a2@(CVIC_SW_IRQ_CLR)
+ rte
+
+ /* Bad exception stubs */
+ .org 0x10000
+_bad_exceptions:
+ .rept 256
+ .balign 0x10
+0: move.b #(0b - _bad_exceptions) / 0x10,%d0
+ move.b %d0,%a0@(BAD_INT_VEC)
+ move.b #STAT_ERR_INVAL_IRQ,%a0@(STAT_REG)
+ halt
+ .endr
diff --git a/cf-fsi-test.c b/cf-fsi-test.c
new file mode 100644
index 0000000..41d8b03
--- /dev/null
+++ b/cf-fsi-test.c
@@ -0,0 +1,664 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <byteswap.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <getopt.h>
+#include <limits.h>
+#include <assert.h>
+#include <arpa/inet.h>
+#include <errno.h>
+#include <time.h>
+
+#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+ : : "r" (0) : "memory")
+
+static inline uint8_t readb(void *addr)
+{
+ dsb();
+ return *(volatile uint8_t *)addr;
+}
+
+static inline uint16_t readw(void *addr)
+{
+ dsb();
+ return *(volatile uint16_t *)addr;
+}
+
+static inline uint32_t readl(void *addr)
+{
+ dsb();
+ return *(volatile uint32_t *)addr;
+}
+
+static inline void writeb(uint8_t val, void *addr)
+{
+ dsb();
+ *(volatile uint8_t *)addr = val;
+}
+
+static inline void writew(uint16_t val, void *addr)
+{
+ dsb();
+ *(volatile uint16_t *)addr = val;
+}
+
+static inline void writel(uint32_t val, void *addr)
+{
+ dsb();
+ *(volatile uint32_t *)addr = val;
+}
+
+static inline void writeq(uint64_t val, void *addr)
+{
+ dsb();
+ *(volatile uint64_t *)addr = val;
+}
+
+#define SCU_REGS 0x000e2000 /* 1e6e2000 */
+#define SCU_COPRO_CTRL (SCU_REGS + 0x100)
+#define SCU_COPRO_RESET 0x00000002
+#define SCU_COPRO_CLK_EN 0x00000001
+#define SCU_COPRO_SEG0 (SCU_REGS + 0x104) /* 1M */
+#define SCU_COPRO_SEG1 (SCU_REGS + 0x108) /* 1M */
+#define SCU_COPRO_SEG2 (SCU_REGS + 0x10c) /* 1M */
+#define SCU_COPRO_SEG3 (SCU_REGS + 0x110) /* 1M */
+#define SCU_COPRO_SEG4 (SCU_REGS + 0x114) /* 1M */
+#define SCU_COPRO_SEG5 (SCU_REGS + 0x118) /* 1M */
+#define SCU_COPRO_SEG6 (SCU_REGS + 0x11c) /* 1M */
+#define SCU_COPRO_SEG7 (SCU_REGS + 0x120) /* 1M */
+#define SCU_COPRO_SEG8 (SCU_REGS + 0x124) /* 8M */
+#define SCU_COPRO_SEG_SWAP 0x00000001
+#define SCU_COPRO_CACHE_CTL (SCU_REGS + 0x128)
+#define SCU_COPRO_CACHE_EN 0x00000001
+#define SCU_COPRO_SEG0_CACHE_EN 0x00000002
+#define SCU_COPRO_SEG1_CACHE_EN 0x00000004
+#define SCU_COPRO_SEG2_CACHE_EN 0x00000008
+#define SCU_COPRO_SEG3_CACHE_EN 0x00000010
+#define SCU_COPRO_SEG4_CACHE_EN 0x00000020
+#define SCU_COPRO_SEG5_CACHE_EN 0x00000040
+#define SCU_COPRO_SEG6_CACHE_EN 0x00000080
+#define SCU_COPRO_SEG7_CACHE_EN 0x00000100
+#define SCU_COPRO_SEG8_CACHE_EN 0x00000200
+
+#define COPRO_ICACHE_FLUSH_REG 0x00008000
+#define COPRO_DCACHE_FLUSH_REG 0x00008004
+
+#define SRAM_BASE 0x00120000 /* 1e720000 - actually 36K */
+#define SRAM_SIZE 0x00008000
+
+#define GPIO_REGS 0x00180000 /* 1e780000 */
+#define GPIO_YZAAAB_CMDSRC0 (GPIO_REGS + 0x170)
+#define GPIO_YZAAAB_CMDSRC1 (GPIO_REGS + 0x174)
+#define GPIO_QRST_CMDSRC0 (GPIO_REGS + 0x110)
+#define GPIO_QRST_CMDSRC1 (GPIO_REGS + 0x114)
+
+#define GPIO_AA_SRC_BIT 0x00010000
+#define GPIO_R_SRC_BIT 0x00000100
+
+#define CVIC_BASE 0x000c2000
+#define CVIC_EN_REG 0x10
+#define CVIC_TRIG_REG 0x18
+
+static void *sysreg;
+#define SYSREG_BASE 0x1e600000 /* System registers */
+#define SYSREG_SIZE 0x00200000 /* 2M*/
+
+static void *cfmem;
+#define CFMEM_BASE 0x9ef00000 /* Reserved memory */
+#define CFMEM_SIZE 0x00100000 /* 1M */
+
+#define CMD_REG 0x00
+#define CMD_REG_CMD_MASK 0x000000ff
+#define CMD_REG_CMD_SHIFT 0
+#define CMD_NONE 0x00
+#define CMD_COMMAND 0x01
+#define CMD_BREAK 0x02
+#define CMD_INVALID 0xff
+#define CMD_REG_CLEN_MASK 0x0000ff00
+#define CMD_REG_CLEN_SHIFT 8
+#define CMD_REG_RLEN_MASK 0x00ff0000
+#define CMD_REG_RLEN_SHIFT 16
+
+#define STAT_REG 0x04
+#define STAT_STOPPED 0x00
+#define STAT_SENDING 0x01
+#define STAT_COMPLETE 0x02
+#define STAT_ERR_INVAL_CMD 0x80
+#define STAT_ERR_INVAL_IRQ 0x81
+#define STAT_ERR_MTOE 0x83
+
+#define STAT_RTAG 0x05
+#define STAT_RCRC 0x06
+
+#define CMD_DATA 0x10 /* 64 bit of data left aligned */
+#define RSP_DATA 0x20 /* 32 bit of data right aligned */
+#define INT_CNT 0x30 /* debug: interrupt count */
+#define BAD_INT_VEC 0x34 /* debug: vector of bad interrupt */
+
+#define TRACEBUF 0x40
+#define TR_CLK0START 0x01
+#define TR_CLKOBIT0 0x02
+#define TR_CLKOBIT1 0x03
+#define TR_CLKZ 0x04 /* + # */
+#define TR_CLKWSTART 0x05 /* + bit */
+
+#define FSI_GPIO_CMD_DPOLL 0x2
+#define FSI_GPIO_CMD_EPOLL 0x3
+#define FSI_GPIO_CMD_TERM 0x3f
+#define FSI_GPIO_CMD_ABS_AR 0x4
+#define FSI_GPIO_CMD_REL_AR 0x5
+#define FSI_GPIO_CMD_SAME_AR 0x3 /* but only a 2-bit opcode... */
+
+#define LAST_ADDR_INVALID 0x1
+
+uint32_t last_addr;
+
+static void open_mem(void)
+{
+ int fd;
+
+ fd = open("/dev/mem", O_RDWR | O_SYNC);
+ if (fd < 0) {
+ perror("can't open /dev/mem");
+ exit(1);
+ }
+
+ sysreg = mmap(0, SYSREG_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, SYSREG_BASE);
+ if (sysreg == MAP_FAILED) {
+ perror("can't map system registers via /dev/mem");
+ exit(1);
+ }
+
+ cfmem = mmap(0, CFMEM_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, CFMEM_BASE);
+ if (cfmem == MAP_FAILED) {
+ perror("can't map CF memory via /dev/mem");
+ exit(1);
+ }
+}
+
+static void setup_cf_maps(void)
+{
+ /*
+ * Note about byteswap setting: the bus is wired backwards,
+ * so setting the byteswap bit actually makes the ColdFire
+ * work "normally" for a BE processor, ie, put the MSB in
+ * the lowest address byte.
+ *
+ * We thus need to set the bit for our main memory which
+ * contains our program code. We create two mappings for
+ * the register, one with each setting.
+ *
+ * Segments 2 and 3 has a "swapped" mapping (BE)
+ * and 6 and 7 have a non-swapped mapping (LE) which allows
+ * us to avoid byteswapping register accesses since the
+ * registers are all LE.
+ */
+
+ /* Setup segment 0 to our memory region */
+ writel(CFMEM_BASE | SCU_COPRO_SEG_SWAP, sysreg + SCU_COPRO_SEG0);
+
+ /* Segments 2 and 3 to sysregs with byteswap (SRAM) */
+ writel(SYSREG_BASE | SCU_COPRO_SEG_SWAP, sysreg + SCU_COPRO_SEG2);
+ writel(SYSREG_BASE | 0x100000 | SCU_COPRO_SEG_SWAP, sysreg + SCU_COPRO_SEG3);
+
+ /* And segment 6 and 7 to our registers */
+ writel(SYSREG_BASE, sysreg + SCU_COPRO_SEG6);
+ writel(SYSREG_BASE | 0x100000, sysreg + SCU_COPRO_SEG7);
+
+ /* Memory cachable, regs and SRAM not cachable */
+ writel(SCU_COPRO_SEG0_CACHE_EN | SCU_COPRO_CACHE_EN,
+ sysreg + SCU_COPRO_CACHE_CTL);
+}
+
+static void reset_cf(void)
+{
+ writel(SCU_COPRO_RESET, sysreg + SCU_COPRO_CTRL);
+ usleep(10);
+ writel(0, sysreg + SCU_COPRO_CTRL);
+}
+
+static void start_cf(void)
+{
+ writel(SCU_COPRO_CLK_EN, sysreg + SCU_COPRO_CTRL);
+}
+
+static void load_cf_code(void)
+{
+ extern uint8_t cf_code_start, cf_code_end;
+
+ uint8_t *code = &cf_code_start;
+ uint8_t *mem = cfmem;
+
+ while(code < &cf_code_end)
+ writeb(*(code++), mem++);
+}
+
+static void gpio_source_arm(void)
+{
+ uint32_t val;
+
+ /* ARM = 00 */
+ val = readl(sysreg + GPIO_YZAAAB_CMDSRC0);
+ val &= ~GPIO_AA_SRC_BIT;
+ writel(val, sysreg + GPIO_YZAAAB_CMDSRC0);
+ val = readl(sysreg + GPIO_YZAAAB_CMDSRC1);
+ val &= ~GPIO_AA_SRC_BIT;
+ writel(val, sysreg + GPIO_YZAAAB_CMDSRC1);
+
+ val = readl(sysreg + GPIO_QRST_CMDSRC0);
+ val &= ~GPIO_R_SRC_BIT;
+ writel(val, sysreg + GPIO_QRST_CMDSRC0);
+ val = readl(sysreg + GPIO_QRST_CMDSRC1);
+ val &= ~GPIO_R_SRC_BIT;
+ writel(val, sysreg + GPIO_QRST_CMDSRC1);
+}
+
+static void gpio_source_cf(void)
+{
+ uint32_t val;
+
+ /* CF = 10 */
+ val = readl(sysreg + GPIO_YZAAAB_CMDSRC0);
+ val &= ~GPIO_AA_SRC_BIT;
+ writel(val, sysreg + GPIO_YZAAAB_CMDSRC0);
+ val = readl(sysreg + GPIO_YZAAAB_CMDSRC1);
+ val |= GPIO_AA_SRC_BIT;
+ writel(val, sysreg + GPIO_YZAAAB_CMDSRC1);
+
+ val = readl(sysreg + GPIO_QRST_CMDSRC0);
+ val &= ~GPIO_R_SRC_BIT;
+ writel(val, sysreg + GPIO_QRST_CMDSRC0);
+ val = readl(sysreg + GPIO_QRST_CMDSRC1);
+ val |= GPIO_R_SRC_BIT;
+ writel(val, sysreg + GPIO_QRST_CMDSRC1);
+}
+
+static const uint8_t crc4_tab[] = {
+ 0x0, 0x7, 0xe, 0x9, 0xb, 0xc, 0x5, 0x2,
+ 0x1, 0x6, 0xf, 0x8, 0xa, 0xd, 0x4, 0x3,
+};
+
+/**
+ * crc4 - calculate the 4-bit crc of a value.
+ * @crc: starting crc4
+ * @x: value to checksum
+ * @bits: number of bits in @x to checksum
+ *
+ * Returns the crc4 value of @x, using polynomial 0b10111.
+ *
+ * The @x value is treated as left-aligned, and bits above @bits are ignored
+ * in the crc calculations.
+ */
+static uint8_t crc4(uint8_t c, uint64_t x, int bits)
+{
+ int i;
+
+ /* mask off anything above the top bit */
+ x &= (1ull << bits) - 1;
+
+ /* Align to 4-bits */
+ bits = (bits + 3) & ~0x3;
+
+ /* Calculate crc4 over four-bit nibbles, starting at the MSbit */
+ for (i = bits - 4; i >= 0; i -= 4)
+ c = crc4_tab[c ^ ((x >> i) & 0xf)];
+
+ return c;
+}
+
+struct fsi_gpio_msg {
+ uint64_t msg;
+ uint8_t bits;
+};
+
+static void msg_push_bits(struct fsi_gpio_msg *msg, uint64_t data, int bits)
+{
+ msg->msg <<= bits;
+ msg->msg |= data & ((1ull << bits) - 1);
+ msg->bits += bits;
+}
+
+static void msg_push_crc(struct fsi_gpio_msg *msg)
+{
+ uint8_t crc;
+ int top;
+
+ top = msg->bits & 0x3;
+
+ /* start bit, and any non-aligned top bits */
+ crc = crc4(0, 1 << top | msg->msg >> (msg->bits - top), top + 1);
+
+ /* aligned bits */
+ crc = crc4(crc, msg->msg, msg->bits - top);
+
+ msg_push_bits(msg, crc, 4);
+}
+
+static bool check_same_address(int id, uint32_t addr)
+{
+ /* this will also handle LAST_ADDR_INVALID */
+ return last_addr == (((id & 0x3) << 21) | (addr & ~0x3));
+}
+
+static bool check_relative_address(int id, uint32_t addr, uint32_t *rel_addrp)
+{
+ uint32_t last_addr = last_addr;
+ int32_t rel_addr;
+
+ if (last_addr == LAST_ADDR_INVALID)
+ return false;
+
+ /* We may be in 23-bit addressing mode, which uses the id as the
+ * top two address bits. So, if we're referencing a different ID,
+ * use absolute addresses.
+ */
+ if (((last_addr >> 21) & 0x3) != id)
+ return false;
+
+ /* remove the top two bits from any 23-bit addressing */
+ last_addr &= (1 << 21) - 1;
+
+ /* We know that the addresses are limited to 21 bits, so this won't
+ * overflow the signed rel_addr */
+ rel_addr = addr - last_addr;
+ if (rel_addr > 255 || rel_addr < -256)
+ return false;
+
+ *rel_addrp = (uint32_t)rel_addr;
+
+ return true;
+}
+
+static void last_address_update(int id, bool valid, uint32_t addr)
+{
+ if (!valid)
+ last_addr = LAST_ADDR_INVALID;
+ else
+ last_addr = ((id & 0x3) << 21) | (addr & ~0x3);
+}
+
+static void build_ar_command(struct fsi_gpio_msg *cmd, uint8_t id,
+ uint32_t addr, size_t size, const void *data)
+{
+ int i, addr_bits, opcode_bits;
+ bool write = !!data;
+ uint8_t ds, opcode;
+ uint32_t rel_addr;
+
+ cmd->bits = 0;
+ cmd->msg = 0;
+
+ /* we have 21 bits of address max */
+ addr &= ((1 << 21) - 1);
+
+ /* cmd opcodes are variable length - SAME_AR is only two bits */
+ opcode_bits = 3;
+
+ if (check_same_address(id, addr)) {
+ /* we still address the byte offset within the word */
+ addr_bits = 2;
+ opcode_bits = 2;
+ opcode = FSI_GPIO_CMD_SAME_AR;
+
+ } else if (check_relative_address(id, addr, &rel_addr)) {
+ /* 8 bits plus sign */
+ addr_bits = 9;
+ addr = rel_addr;
+ opcode = FSI_GPIO_CMD_REL_AR;
+
+ } else {
+ addr_bits = 21;
+ opcode = FSI_GPIO_CMD_ABS_AR;
+ }
+
+ /*
+ * The read/write size is encoded in the lower bits of the address
+ * (as it must be naturally-aligned), and the following ds bit.
+ *
+ * size addr:1 addr:0 ds
+ * 1 x x 0
+ * 2 x 0 1
+ * 4 0 1 1
+ *
+ */
+ ds = size > 1 ? 1 : 0;
+ addr &= ~(size - 1);
+ if (size == 4)
+ addr |= 1;
+
+ msg_push_bits(cmd, id, 2);
+ msg_push_bits(cmd, opcode, opcode_bits);
+ msg_push_bits(cmd, write ? 0 : 1, 1);
+ msg_push_bits(cmd, addr, addr_bits);
+ msg_push_bits(cmd, ds, 1);
+ for (i = 0; write && i < size; i++)
+ msg_push_bits(cmd, ((uint8_t *)data)[i], 8);
+
+ msg_push_crc(cmd);
+}
+
+void test_read(uint32_t addr)
+{
+ struct fsi_gpio_msg cmd;
+ uint32_t op, resp, crc;
+ uint8_t stat, rtag, rcrc;
+ int i;
+
+ build_ar_command(&cmd, 0, addr, 4, NULL);
+
+ /* Left align message */
+ cmd.msg <<= (64 - cmd.bits);
+
+ printf("msg: %d bits 0x%016llx\n", cmd.bits, cmd.msg);
+
+ /* Store message into SRAM */
+ // byteswap ? */
+ writel(htonl(cmd.msg >> 32), sysreg + SRAM_BASE + CMD_DATA);
+ writel(htonl(cmd.msg & 0xffffffff), sysreg + SRAM_BASE + CMD_DATA + 4);
+
+ op = CMD_COMMAND;
+ op |= cmd.bits << CMD_REG_CLEN_SHIFT;
+ op |= 32 << CMD_REG_RLEN_SHIFT;
+ writel(htonl(op), sysreg + SRAM_BASE + CMD_REG);
+
+ /* Ring doorbell */
+ writel(0x2, sysreg + CVIC_BASE + CVIC_TRIG_REG);
+
+ do {
+ stat = readb(sysreg + SRAM_BASE + STAT_REG);
+ printf(" stat=%02x\n", stat);
+ } while(stat < STAT_COMPLETE || stat == 0xff);
+
+ resp = ntohl(readl(sysreg + SRAM_BASE + RSP_DATA));
+ rtag = readb(sysreg + SRAM_BASE + STAT_RTAG);
+ rcrc = readb(sysreg + SRAM_BASE + STAT_RCRC);
+ writeb(0, sysreg + SRAM_BASE + STAT_REG);
+
+ printf("CMD=%08x STAT=%02x RTAG=%02x, RCRC=%02x, RDATA=%08x\n",
+ ntohl(readl(sysreg + SRAM_BASE + CMD_REG)),
+ stat, rtag, rcrc, resp);
+
+ /* we have a whole message now; check CRC */
+ crc = crc4(0, 1, 1);
+ crc = crc4(crc, resp, 32);
+ crc = crc4(crc, rcrc, 4);
+ if (crc)
+ printf("CRC ok !\n");
+ else
+ printf("BAD CRC !!!\n");
+
+ for (i = 0; i < 256; i++) {
+ printf("%02x ", readb(sysreg + SRAM_BASE + TRACEBUF + i));
+ if ((i % 16) == 15)
+ printf("\n");
+ }
+ printf("\n");
+}
+
+static void dump_stuff(void)
+{
+ int i;
+
+ printf("CMD:%08x STAT:%02x INT: %08x\n",
+ ntohl(readl(sysreg + SRAM_BASE + CMD_REG)),
+ readb(sysreg + SRAM_BASE + STAT_REG),
+ ntohl(readl(sysreg + SRAM_BASE + INT_CNT)));
+
+ for (i = 0; i < 256; i++) {
+ printf("%02x ", readb(sysreg + SRAM_BASE + TRACEBUF + i));
+ if ((i % 16) == 15)
+ printf("\n");
+ }
+}
+
+int test_read_fast(uint32_t addr, uint32_t *data)
+{
+ struct fsi_gpio_msg cmd;
+ uint32_t op, resp, crc;
+ uint8_t stat, rtag, rcrc;
+ uint32_t timeout = 100000;
+
+ build_ar_command(&cmd, 0, addr, 4, NULL);
+
+ /* Left align message */
+ cmd.msg <<= (64 - cmd.bits);
+
+ /* Store message into SRAM */
+ writel(htonl(cmd.msg >> 32), sysreg + SRAM_BASE + CMD_DATA);
+ writel(htonl(cmd.msg & 0xffffffff), sysreg + SRAM_BASE + CMD_DATA + 4);
+
+ op = CMD_COMMAND;
+ op |= cmd.bits << CMD_REG_CLEN_SHIFT;
+ op |= 32 << CMD_REG_RLEN_SHIFT;
+ writel(htonl(op), sysreg + SRAM_BASE + CMD_REG);
+
+ /* Ring doorbell */
+ writel(0x2, sysreg + CVIC_BASE + CVIC_TRIG_REG);
+
+ do {
+ if (timeout-- == 0) {
+ printf("Timeout !\n");
+
+ dump_stuff();
+ return -ETIMEDOUT;
+ }
+ stat = readb(sysreg + SRAM_BASE + STAT_REG);
+ } while(stat < STAT_COMPLETE || stat == 0xff);
+
+ resp = ntohl(readl(sysreg + SRAM_BASE + RSP_DATA));
+ rtag = readb(sysreg + SRAM_BASE + STAT_RTAG);
+ rcrc = readb(sysreg + SRAM_BASE + STAT_RCRC);
+
+ /* Clear status reg */
+ writeb(0, sysreg + SRAM_BASE + STAT_REG);
+
+ /* we have a whole message now; check CRC */
+ crc = crc4(0, 1, 1);
+ crc = crc4(crc, resp, 32);
+ crc = crc4(crc, rcrc, 4);
+ if (!crc) {
+ last_address_update(0, false, 0);
+ printf("BAD CRC !!!\n");
+ return -ETIMEDOUT;
+ }
+ if (rtag & 3) {
+ printf("FSI error 0x%x\n", rtag & 3);
+ last_address_update(0, false, 0);
+ return -EIO;
+ }
+ last_address_update(0, true, addr);
+ *data = resp;
+ return 0;
+}
+
+void bench(void)
+{
+ struct timespec t0, t1;
+ uint32_t val, orig;
+ uint64_t tns0, tns1;
+ int i, rc;
+
+ printf("Bench...\n");
+ rc = test_read_fast(0, &orig);
+ if (rc)
+ return;
+ clock_gettime(CLOCK_MONOTONIC, &t0);
+ for (i = 0; i < (0x100000 / 4); i++) {
+ rc = test_read_fast(0, &val);
+ if (rc) {
+ printf("Failed after %d iterations\n", i);
+ break;
+ }
+ if (val != orig) {
+ printf("mismatch ! %08x vs. %08x\n", val, orig);
+ break;
+ }
+ }
+ printf("\n");
+ clock_gettime(CLOCK_MONOTONIC, &t1);
+ tns0 = t0.tv_sec * 1000000000ull + t0.tv_nsec;
+ tns1 = t1.tv_sec * 1000000000ull + t1.tv_nsec;
+ fprintf(stderr, "Spent: %lld ms\n", (tns1 - tns0) / 1000000);
+}
+
+
+int main(int argc, char *argv[])
+{
+ uint32_t val;
+
+ open_mem();
+
+ printf("Resetting ColdFire...\n");
+ reset_cf();
+
+ printf("Setting up and starting ColdFire...\n");
+
+ setup_cf_maps();
+
+ load_cf_code();
+
+ gpio_source_cf();
+
+ /* Mark command invalid and clear sram */
+ // writel(0xffffffff, sysreg + SRAM_BASE + CMD_REG);
+ memset(sysreg + SRAM_BASE, 0xff, 0x1000);
+
+ /* Start ColdFire */
+ start_cf();
+
+ /* Wait for ack */
+ do {
+ val = readl(sysreg + SRAM_BASE + CMD_REG);
+ } while (val == 0xffffffff);
+
+ printf("CMD:%08x STAT:%02x INT: %08x\n",
+ ntohl(readl(sysreg + SRAM_BASE + CMD_REG)),
+ readb(sysreg + SRAM_BASE + STAT_REG),
+ ntohl(readl(sysreg + SRAM_BASE + INT_CNT)));
+
+ /* Enable interrupt */
+ writel(0x2, sysreg + CVIC_BASE + CVIC_EN_REG);
+
+ last_address_update(0, false, 0);
+
+ /* Test read */
+ test_read(0);
+ test_read(4);
+
+ bench();
+
+ gpio_source_arm();
+
+ return 0;
+}
+
+
diff --git a/cf-wrapper.S b/cf-wrapper.S
new file mode 100644
index 0000000..9b8f5cf
--- /dev/null
+++ b/cf-wrapper.S
@@ -0,0 +1,9 @@
+ .data
+ .align 8
+ .globl cf_code_start
+cf_code_start:
+ .incbin "cf-code.bin"
+ .align 8
+ .globl cf_code_end
+cf_code_end:
+ .previous
OpenPOWER on IntegriCloud