diff options
-rw-r--r-- | Makefile | 27 | ||||
-rw-r--r-- | cf-code.s | 438 | ||||
-rw-r--r-- | cf-fsi-test.c | 664 | ||||
-rw-r--r-- | cf-wrapper.S | 9 |
4 files changed, 1138 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a75f0a1 --- /dev/null +++ b/Makefile @@ -0,0 +1,27 @@ +M68KCROSS ?= /opt/cross/binutils-coldfire/bin/m68k-unknown-elf- +M68KAS=$(M68KCROSS)as +M68KLD=$(M68KCROSS)ld +M68KOC=$(M68KCROSS)objcopy + +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Wall -Os + +all: cf-fsi-test + +cf-code.o : cf-code.s + $(M68KAS) -march=isac $^ -o $@ + +cf-code.elf : cf-code.o + $(M68KLD) -Ttext 0 $^ -o $@ + +cf-code.bin : cf-code.elf + $(M68KOC) -O binary $^ $@ + +cf-wrapper.o : cf-wrapper.S cf-code.bin + $(CC) $(CFLAGS) -c cf-wrapper.S -o $@ + +cf-fsi-test : cf-fsi-test.o cf-wrapper.o + $(CC) $(CFLAGS) $^ -o $@ + +clean: + rm -rf cf-fsi-test *.o *.elf *.bin diff --git a/cf-code.s b/cf-code.s new file mode 100644 index 0000000..725ddd3 --- /dev/null +++ b/cf-code.s @@ -0,0 +1,438 @@ + .text + + .equ TRACE, 0 +_vecs: + /* Vectors */ + .org 0 + + /* Boot vector */ + .long 0x0ffff0 /* Stack below 1M */ + .long 0x400 /* Start code */ + + /* + * Remaining 254 vectors point to corresponding stubs + * starting at 0x10000, 0x10 bytes each + */ + .rept 254 +0: .long 0x10000 + (0b - _vecs) * 4 + .endr + + /* Main entry */ + .org 0x400 + .global _start +_start: + .equ SRAM_BASE_BE, 0x320000 + .equ SRAM_BASE_LE, 0x720000 + .equ GPIO_BASE, 0x780000 + .equ CVIC_BASE, 0x6c2000 + + .equ CVIC_STATUS, 0x00 + .equ CVIC_SW_IRQ_CLR, 0x1c + .equ CVIC_SW_IRQ, 0x2 + + /* XXX Romulus specific definitions */ + .equ GPIO_YZAAAB_DATA, 0x1e0 + .equ GPIO_YZAAAB_DIR, 0x1e4 + .equ GPIO_CLOCK_BIT, 16 + .equ GPIO_DATA_BIT, 18 + .equ GPIO_QRST_DATA, 0x80 + .equ GPIO_QRST_TRANS_BIT, 10 + + /**** SRAM layout ****/ + + /* Command register: + * + * +---------------------------+ + * | rsvd | RLEN | CLEN | CMD | + * | 8 | 8 | 8 | 8 | + * +---------------------------+ + * | | | + * Response len | | + * (in bits) | | + * | | + * Command len | + * (in bits) | + * | + * Command code + */ + .equ CMD_REG, 0x00 + .equ CMD_NONE, 0x00 + .equ CMD_COMMAND, 0x01 + .equ CMD_BREAK, 0x02 + + /* Status register + * + */ + .equ STAT_REG, 0x04 /* Status */ + .equ STAT_STOPPED, 0x00 + .equ STAT_SENDING, 0x01 + .equ STAT_COMPLETE, 0x02 + .equ STAT_ERR_INVAL_CMD, 0x80 + .equ STAT_ERR_INVAL_IRQ, 0x81 + .equ STAT_ERR_MTOE, 0x82 + + /* Response tag */ + .equ STAT_RTAG, 0x05 + + /* Response CRC */ + .equ STAT_RCRC, 0x06 + + /* Command data area + * + * Last byte of message must be left aligned + */ + .equ CMD_DATA, 0x10 /* 64 bit of data */ + + /* Response data area, right aligned, unused top bits are 1 */ + .equ RSP_DATA, 0x20 /* 32 bit of data */ + + /* Misc */ + .equ INT_CNT, 0x30 /* 32-bit interrupt count */ + .equ BAD_INT_VEC, 0x34 + .equ TRACEBUF, 0x40 + .equ TR_CLKOSTART, 0x00 + .equ TR_OLEN, 0x01/* + len */ + .equ TR_CLKOBIT0, 0x02 + .equ TR_CLKOBIT1, 0x03 + .equ TR_CLKZ, 0x04 /* + count */ + .equ TR_CLKWSTART, 0x05 + .equ TR_CLKTAG, 0x06 + .equ TR_CLKDATA, 0x07 /* + len */ + .equ TR_CLKCRC, 0x08 /* + raw crc */ + .equ TR_CLKIBIT0, 0x80 + .equ TR_CLKIBIT1, 0x81 + + /* Useful macros */ + + .if TRACE == 1 + .macro trace op:req + move.b \op,%a3@+ + .endm + .else + .macro trace op:req + .endm + .endif + + /* clock_toggle: toggle the clock down and back up */ + .macro clock_toggle + bclr.l #GPIO_CLOCK_BIT,%d7 /* clock low */ + move.l %d7,%a1@(0) + bset.l #GPIO_CLOCK_BIT,%d7 /* clock high */ + move.l %d7,%a1@(0) + .endm + + /* clock_out_bit reg: Clock out bit 31 of reg */ + /* XXX TODO: only write to GPIO if value changes */ + /* XXX TODO: can probably optimize further using shifts & logical ops rather than branches */ + .macro clock_out_bit reg:req + btst.l #31,\reg + beq 98f + bset.l #GPIO_DATA_BIT,%d7 + trace #TR_CLKOBIT1 + bra 99f +98: bclr.l #GPIO_DATA_BIT,%d7 + trace #TR_CLKOBIT0 +99: move.l %d7,%a1@(0) + clock_toggle + .endm + + /* clock_zeros reg: Clock out zeros (GPIO set to 1), assume at least 1 */ + .macro clock_out_zeros reg:req + trace #TR_CLKZ + trace \reg + bset.l #GPIO_DATA_BIT,%d7 + move.l %d7,%a1@(0) +99: clock_toggle + subq.l #1,\reg + bne 99b + .endm + + /* clock_in_bit reg: Clocks in bit into bit 0 of reg, the rest is 0 + * note: bit 0 of reg must already be cleared + */ + .macro clock_in_bit reg:req tmp:req tmp2:req + bclr.l #GPIO_CLOCK_BIT,%d7 /* clock low */ + move.l %d7,%a1@(0) + move.l %a1@(0),\tmp /* dummy read */ + move.l %a1@(0),\tmp /* actual read */ + bset.l #GPIO_CLOCK_BIT,%d7 /* clock high */ + move.l %d7,%a1@(0) + moveq.l #GPIO_DATA_BIT,\tmp2 + lsr.l \tmp2,\tmp + moveq.l #1,\tmp2 + and.l \tmp2,\tmp + or.l \tmp,\reg + .if TRACE == 1 + move.l #TR_CLKIBIT0,\tmp2 + or.l \tmp,\tmp2 + trace \tmp2 + .endif + .endm + + /* Register usage + * + * A0 : SRAM base (BE) + * A1 : GPIO address. This is he data register, we assume the direction + * register is at this +4 + * A2: CVIC address. + * A3: TRACEBUF + * A6: CMD/RESP pointer + * D7 : clock GPIO value (and data on Romulus) + * D6 : loop counter + * D5 : command register + * D4 : data value + */ + + /* Get base addresses */ + movea.l #SRAM_BASE_BE,%a0 + movea.l #GPIO_BASE,%a1 + add.l #GPIO_YZAAAB_DATA,%a1 + movea.l #CVIC_BASE,%a2 + + /* Load GPIO value and Configure clock & data GPIO as output */ + move.l %a1@(0),%d7 + bset.l #GPIO_CLOCK_BIT,%d7 + bset.l #GPIO_DATA_BIT,%d7 + move.l %d7, %a1@(0) + move.l %a1@(4),%d0 + bset.l #GPIO_CLOCK_BIT,%d0 + bset.l #GPIO_DATA_BIT,%d0 + move.l %d0,%a1@(4) + + /* Cache GPIO value */ + move.l %a1@(0),%d7 + + /* Clear interrupt count */ + moveq.l #0,%d0 + move.l %d0,%a0@(INT_CNT) + + /* Install external interrupt vector */ + lea _int,%a6 + move.l %a6,(0x46*4) + + /* Mask interrupts */ + move.w #0x2000,%sr + + /* Configure GPIOs to output */ + bsr config_gpio_out + + /**** Main loop ****/ +main_loop: + /* Initialize A6 to point to command area */ + lea %a0@(CMD_DATA),%a6 + + lea %a0@(TRACEBUF),%a3 + + /* Wait for command */ +1: move.l %a0@(CMD_REG),%d5 + tst.b %d5 + bne 1f + stop #0x2000 + bra 1b + + /* Mask interrupts */ +1: move.w #0x2007,%sr + + /* Mark ourselves as sending a command */ + move.b #STAT_SENDING,%a0@(STAT_REG) + + /* Clear command register */ + move.b #CMD_NONE,%a0@(CMD_REG + 3) + + /* Start command ? */ + cmpi.b #CMD_COMMAND,%d5 + beq start_command + + /* Break command ? */ + cmpi.b #CMD_BREAK,%d5 + beq start_break + + /* Error */ + move.b #STAT_ERR_INVAL_CMD,%a0@(STAT_REG) + bra main_loop + +start_command: + /* Start bit */ + moveq.l #0,%d0 + clock_out_bit %d0 + trace #TR_CLKOSTART + + /* Load first lword and invert it */ + move.l %a6@(0),%d4 + not.l %d4 + + /* Shift command right to get bit count at bottom */ + lsr.l #8,%d5 + + trace #TR_OLEN + trace %d5 + + /* More than 32 ? If not go to tail + * + * Note: This assumes we have at least 1 bit to clock + */ + btst.b #5,%d5 + beq 1f + + /* Clock out 32 bits */ + moveq #32,%d6 + sub.l %d6,%d5 +0: clock_out_bit %d4 + lsl.l #1,%d4 + subq.l #1,%d6 + bne 0b + + /* Get remaining bits */ + move.l %a6@(4),%d4 + not.l %d4 + + /* Clock out what's left */ +1: moveq.l #0,%d6 + move.b %d5,%d6 + beq 2f + trace #TR_OLEN + trace %d6 +0: clock_out_bit %d4 + lsl.l #1,%d4 + subq.l #1,%d6 + bne 0b + +2: /* Done sending, ready to receive, first echo delay */ + moveq #16,%d6 + clock_out_zeros %d6 + + /* Set GPIO and transceivers to input */ + bsr config_gpio_in + + /* Wait for start bit */ + move.l #1000,%d6 + trace #TR_CLKWSTART +0: moveq #0,%d4 + clock_in_bit %d4,%d0,%d1 + /* We read inverted value, so wait for a "0" */ + btst #0,%d4 + beq 1f + subq.l #1,%d6 + bne 0b + move.b #STAT_ERR_MTOE,%a0@(STAT_REG) + bra send_delay + +1: /* Got start bit, clock in slave ID and response tag */ + trace #TR_CLKTAG + moveq #4,%d6 + moveq #0,%d4 +0: lsl.l #1,%d4 + clock_in_bit %d4,%d0,%d1 + subq.l #1,%d6 + bne 0b + + /* Invert data */ + not.l %d4 + + /* (not strictly needed: clean up top bits) */ + moveq #0xf,%d0 + and.l %d0,%d4 + + /* Store into STAT_RTAG for host */ + move.b %d4,%a0@(STAT_RTAG) + + /* Extract tag part */ + moveq #0x7,%d0 + and.l %d0,%d4 + + /* If non-0, no data, go get CRC */ + bne 1f + + /* Do we expect data ? */ + lsr.l #8,%d5 + beq 1f + + /* Let's get data. Assume no more than 32-bits */ + trace #TR_CLKDATA + trace %d5 + move.l %d5,%d6 + moveq.l #0,%d4 +0: lsl.l #1,%d4 + clock_in_bit %d4,%d0,%d1 + subq.l #1,%d6 + bne 0b + + /* Invert data and store it */ + not.l %d4 + move.l %d4,%a0@(RSP_DATA) + +1: /* Grab CRC */ + trace #TR_CLKCRC + moveq.l #4,%d6 + moveq.l #0,%d4 +0: lsl.l #1,%d4 + clock_in_bit %d4,%d0,%d1 + subq.l #1,%d6 + bne 0b + trace %d4 + + /* Invert it, extract 4 bits, and store it */ + not.l %d4 + moveq.l #0xf,%d0 + and.l %d0,%d4 + move.b %d4,%a0@(STAT_RCRC) + + /* Mark command complete */ + move.b #STAT_COMPLETE,%a0@(STAT_REG) + +send_delay: + /* Configure GPIOs to output */ + bsr config_gpio_out + + /* Send delay after every command */ + moveq #16,%d6 + clock_out_zeros %d6 + bra main_loop + +start_break: + move.b #STAT_COMPLETE,%a0@(STAT_REG) + bra main_loop + +config_gpio_out: + /* Configure data GPIO as output, value 1 (idle) */ + bset.l #GPIO_DATA_BIT,%d7 + move.l %d7,%a1@(0) + move.l %a1@(4),%d0 + bset.l #GPIO_DATA_BIT,%d0 + move.l %d0,%a1@(4) + + /* Set transceivers to output */ + move.l %a1@(GPIO_QRST_DATA-GPIO_YZAAAB_DATA),%d0 + bset.l #GPIO_QRST_TRANS_BIT,%d0 + move.l %d0,%a1@(GPIO_QRST_DATA-GPIO_YZAAAB_DATA) + rts + +config_gpio_in: + /* Set transceiver to input */ + move.l %a1@(GPIO_QRST_DATA-GPIO_YZAAAB_DATA),%d0 + bclr.l #GPIO_QRST_TRANS_BIT,%d0 + move.l %d0,%a1@(GPIO_QRST_DATA-GPIO_YZAAAB_DATA) + + /* Configure data GPIO as input */ + move.l %a1@(4),%d0 + bclr.l #GPIO_DATA_BIT,%d0 + move.l %d0,%a1@(4) + rts + + /* Interrupt handler */ +_int: + addq.l #1,%a0@(INT_CNT) + moveq.l #CVIC_SW_IRQ, %d0 + move.l %d0,%a2@(CVIC_SW_IRQ_CLR) + rte + + /* Bad exception stubs */ + .org 0x10000 +_bad_exceptions: + .rept 256 + .balign 0x10 +0: move.b #(0b - _bad_exceptions) / 0x10,%d0 + move.b %d0,%a0@(BAD_INT_VEC) + move.b #STAT_ERR_INVAL_IRQ,%a0@(STAT_REG) + halt + .endr diff --git a/cf-fsi-test.c b/cf-fsi-test.c new file mode 100644 index 0000000..41d8b03 --- /dev/null +++ b/cf-fsi-test.c @@ -0,0 +1,664 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <byteswap.h> +#include <stdint.h> +#include <stdbool.h> +#include <getopt.h> +#include <limits.h> +#include <assert.h> +#include <arpa/inet.h> +#include <errno.h> +#include <time.h> + +#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ + : : "r" (0) : "memory") + +static inline uint8_t readb(void *addr) +{ + dsb(); + return *(volatile uint8_t *)addr; +} + +static inline uint16_t readw(void *addr) +{ + dsb(); + return *(volatile uint16_t *)addr; +} + +static inline uint32_t readl(void *addr) +{ + dsb(); + return *(volatile uint32_t *)addr; +} + +static inline void writeb(uint8_t val, void *addr) +{ + dsb(); + *(volatile uint8_t *)addr = val; +} + +static inline void writew(uint16_t val, void *addr) +{ + dsb(); + *(volatile uint16_t *)addr = val; +} + +static inline void writel(uint32_t val, void *addr) +{ + dsb(); + *(volatile uint32_t *)addr = val; +} + +static inline void writeq(uint64_t val, void *addr) +{ + dsb(); + *(volatile uint64_t *)addr = val; +} + +#define SCU_REGS 0x000e2000 /* 1e6e2000 */ +#define SCU_COPRO_CTRL (SCU_REGS + 0x100) +#define SCU_COPRO_RESET 0x00000002 +#define SCU_COPRO_CLK_EN 0x00000001 +#define SCU_COPRO_SEG0 (SCU_REGS + 0x104) /* 1M */ +#define SCU_COPRO_SEG1 (SCU_REGS + 0x108) /* 1M */ +#define SCU_COPRO_SEG2 (SCU_REGS + 0x10c) /* 1M */ +#define SCU_COPRO_SEG3 (SCU_REGS + 0x110) /* 1M */ +#define SCU_COPRO_SEG4 (SCU_REGS + 0x114) /* 1M */ +#define SCU_COPRO_SEG5 (SCU_REGS + 0x118) /* 1M */ +#define SCU_COPRO_SEG6 (SCU_REGS + 0x11c) /* 1M */ +#define SCU_COPRO_SEG7 (SCU_REGS + 0x120) /* 1M */ +#define SCU_COPRO_SEG8 (SCU_REGS + 0x124) /* 8M */ +#define SCU_COPRO_SEG_SWAP 0x00000001 +#define SCU_COPRO_CACHE_CTL (SCU_REGS + 0x128) +#define SCU_COPRO_CACHE_EN 0x00000001 +#define SCU_COPRO_SEG0_CACHE_EN 0x00000002 +#define SCU_COPRO_SEG1_CACHE_EN 0x00000004 +#define SCU_COPRO_SEG2_CACHE_EN 0x00000008 +#define SCU_COPRO_SEG3_CACHE_EN 0x00000010 +#define SCU_COPRO_SEG4_CACHE_EN 0x00000020 +#define SCU_COPRO_SEG5_CACHE_EN 0x00000040 +#define SCU_COPRO_SEG6_CACHE_EN 0x00000080 +#define SCU_COPRO_SEG7_CACHE_EN 0x00000100 +#define SCU_COPRO_SEG8_CACHE_EN 0x00000200 + +#define COPRO_ICACHE_FLUSH_REG 0x00008000 +#define COPRO_DCACHE_FLUSH_REG 0x00008004 + +#define SRAM_BASE 0x00120000 /* 1e720000 - actually 36K */ +#define SRAM_SIZE 0x00008000 + +#define GPIO_REGS 0x00180000 /* 1e780000 */ +#define GPIO_YZAAAB_CMDSRC0 (GPIO_REGS + 0x170) +#define GPIO_YZAAAB_CMDSRC1 (GPIO_REGS + 0x174) +#define GPIO_QRST_CMDSRC0 (GPIO_REGS + 0x110) +#define GPIO_QRST_CMDSRC1 (GPIO_REGS + 0x114) + +#define GPIO_AA_SRC_BIT 0x00010000 +#define GPIO_R_SRC_BIT 0x00000100 + +#define CVIC_BASE 0x000c2000 +#define CVIC_EN_REG 0x10 +#define CVIC_TRIG_REG 0x18 + +static void *sysreg; +#define SYSREG_BASE 0x1e600000 /* System registers */ +#define SYSREG_SIZE 0x00200000 /* 2M*/ + +static void *cfmem; +#define CFMEM_BASE 0x9ef00000 /* Reserved memory */ +#define CFMEM_SIZE 0x00100000 /* 1M */ + +#define CMD_REG 0x00 +#define CMD_REG_CMD_MASK 0x000000ff +#define CMD_REG_CMD_SHIFT 0 +#define CMD_NONE 0x00 +#define CMD_COMMAND 0x01 +#define CMD_BREAK 0x02 +#define CMD_INVALID 0xff +#define CMD_REG_CLEN_MASK 0x0000ff00 +#define CMD_REG_CLEN_SHIFT 8 +#define CMD_REG_RLEN_MASK 0x00ff0000 +#define CMD_REG_RLEN_SHIFT 16 + +#define STAT_REG 0x04 +#define STAT_STOPPED 0x00 +#define STAT_SENDING 0x01 +#define STAT_COMPLETE 0x02 +#define STAT_ERR_INVAL_CMD 0x80 +#define STAT_ERR_INVAL_IRQ 0x81 +#define STAT_ERR_MTOE 0x83 + +#define STAT_RTAG 0x05 +#define STAT_RCRC 0x06 + +#define CMD_DATA 0x10 /* 64 bit of data left aligned */ +#define RSP_DATA 0x20 /* 32 bit of data right aligned */ +#define INT_CNT 0x30 /* debug: interrupt count */ +#define BAD_INT_VEC 0x34 /* debug: vector of bad interrupt */ + +#define TRACEBUF 0x40 +#define TR_CLK0START 0x01 +#define TR_CLKOBIT0 0x02 +#define TR_CLKOBIT1 0x03 +#define TR_CLKZ 0x04 /* + # */ +#define TR_CLKWSTART 0x05 /* + bit */ + +#define FSI_GPIO_CMD_DPOLL 0x2 +#define FSI_GPIO_CMD_EPOLL 0x3 +#define FSI_GPIO_CMD_TERM 0x3f +#define FSI_GPIO_CMD_ABS_AR 0x4 +#define FSI_GPIO_CMD_REL_AR 0x5 +#define FSI_GPIO_CMD_SAME_AR 0x3 /* but only a 2-bit opcode... */ + +#define LAST_ADDR_INVALID 0x1 + +uint32_t last_addr; + +static void open_mem(void) +{ + int fd; + + fd = open("/dev/mem", O_RDWR | O_SYNC); + if (fd < 0) { + perror("can't open /dev/mem"); + exit(1); + } + + sysreg = mmap(0, SYSREG_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, SYSREG_BASE); + if (sysreg == MAP_FAILED) { + perror("can't map system registers via /dev/mem"); + exit(1); + } + + cfmem = mmap(0, CFMEM_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, CFMEM_BASE); + if (cfmem == MAP_FAILED) { + perror("can't map CF memory via /dev/mem"); + exit(1); + } +} + +static void setup_cf_maps(void) +{ + /* + * Note about byteswap setting: the bus is wired backwards, + * so setting the byteswap bit actually makes the ColdFire + * work "normally" for a BE processor, ie, put the MSB in + * the lowest address byte. + * + * We thus need to set the bit for our main memory which + * contains our program code. We create two mappings for + * the register, one with each setting. + * + * Segments 2 and 3 has a "swapped" mapping (BE) + * and 6 and 7 have a non-swapped mapping (LE) which allows + * us to avoid byteswapping register accesses since the + * registers are all LE. + */ + + /* Setup segment 0 to our memory region */ + writel(CFMEM_BASE | SCU_COPRO_SEG_SWAP, sysreg + SCU_COPRO_SEG0); + + /* Segments 2 and 3 to sysregs with byteswap (SRAM) */ + writel(SYSREG_BASE | SCU_COPRO_SEG_SWAP, sysreg + SCU_COPRO_SEG2); + writel(SYSREG_BASE | 0x100000 | SCU_COPRO_SEG_SWAP, sysreg + SCU_COPRO_SEG3); + + /* And segment 6 and 7 to our registers */ + writel(SYSREG_BASE, sysreg + SCU_COPRO_SEG6); + writel(SYSREG_BASE | 0x100000, sysreg + SCU_COPRO_SEG7); + + /* Memory cachable, regs and SRAM not cachable */ + writel(SCU_COPRO_SEG0_CACHE_EN | SCU_COPRO_CACHE_EN, + sysreg + SCU_COPRO_CACHE_CTL); +} + +static void reset_cf(void) +{ + writel(SCU_COPRO_RESET, sysreg + SCU_COPRO_CTRL); + usleep(10); + writel(0, sysreg + SCU_COPRO_CTRL); +} + +static void start_cf(void) +{ + writel(SCU_COPRO_CLK_EN, sysreg + SCU_COPRO_CTRL); +} + +static void load_cf_code(void) +{ + extern uint8_t cf_code_start, cf_code_end; + + uint8_t *code = &cf_code_start; + uint8_t *mem = cfmem; + + while(code < &cf_code_end) + writeb(*(code++), mem++); +} + +static void gpio_source_arm(void) +{ + uint32_t val; + + /* ARM = 00 */ + val = readl(sysreg + GPIO_YZAAAB_CMDSRC0); + val &= ~GPIO_AA_SRC_BIT; + writel(val, sysreg + GPIO_YZAAAB_CMDSRC0); + val = readl(sysreg + GPIO_YZAAAB_CMDSRC1); + val &= ~GPIO_AA_SRC_BIT; + writel(val, sysreg + GPIO_YZAAAB_CMDSRC1); + + val = readl(sysreg + GPIO_QRST_CMDSRC0); + val &= ~GPIO_R_SRC_BIT; + writel(val, sysreg + GPIO_QRST_CMDSRC0); + val = readl(sysreg + GPIO_QRST_CMDSRC1); + val &= ~GPIO_R_SRC_BIT; + writel(val, sysreg + GPIO_QRST_CMDSRC1); +} + +static void gpio_source_cf(void) +{ + uint32_t val; + + /* CF = 10 */ + val = readl(sysreg + GPIO_YZAAAB_CMDSRC0); + val &= ~GPIO_AA_SRC_BIT; + writel(val, sysreg + GPIO_YZAAAB_CMDSRC0); + val = readl(sysreg + GPIO_YZAAAB_CMDSRC1); + val |= GPIO_AA_SRC_BIT; + writel(val, sysreg + GPIO_YZAAAB_CMDSRC1); + + val = readl(sysreg + GPIO_QRST_CMDSRC0); + val &= ~GPIO_R_SRC_BIT; + writel(val, sysreg + GPIO_QRST_CMDSRC0); + val = readl(sysreg + GPIO_QRST_CMDSRC1); + val |= GPIO_R_SRC_BIT; + writel(val, sysreg + GPIO_QRST_CMDSRC1); +} + +static const uint8_t crc4_tab[] = { + 0x0, 0x7, 0xe, 0x9, 0xb, 0xc, 0x5, 0x2, + 0x1, 0x6, 0xf, 0x8, 0xa, 0xd, 0x4, 0x3, +}; + +/** + * crc4 - calculate the 4-bit crc of a value. + * @crc: starting crc4 + * @x: value to checksum + * @bits: number of bits in @x to checksum + * + * Returns the crc4 value of @x, using polynomial 0b10111. + * + * The @x value is treated as left-aligned, and bits above @bits are ignored + * in the crc calculations. + */ +static uint8_t crc4(uint8_t c, uint64_t x, int bits) +{ + int i; + + /* mask off anything above the top bit */ + x &= (1ull << bits) - 1; + + /* Align to 4-bits */ + bits = (bits + 3) & ~0x3; + + /* Calculate crc4 over four-bit nibbles, starting at the MSbit */ + for (i = bits - 4; i >= 0; i -= 4) + c = crc4_tab[c ^ ((x >> i) & 0xf)]; + + return c; +} + +struct fsi_gpio_msg { + uint64_t msg; + uint8_t bits; +}; + +static void msg_push_bits(struct fsi_gpio_msg *msg, uint64_t data, int bits) +{ + msg->msg <<= bits; + msg->msg |= data & ((1ull << bits) - 1); + msg->bits += bits; +} + +static void msg_push_crc(struct fsi_gpio_msg *msg) +{ + uint8_t crc; + int top; + + top = msg->bits & 0x3; + + /* start bit, and any non-aligned top bits */ + crc = crc4(0, 1 << top | msg->msg >> (msg->bits - top), top + 1); + + /* aligned bits */ + crc = crc4(crc, msg->msg, msg->bits - top); + + msg_push_bits(msg, crc, 4); +} + +static bool check_same_address(int id, uint32_t addr) +{ + /* this will also handle LAST_ADDR_INVALID */ + return last_addr == (((id & 0x3) << 21) | (addr & ~0x3)); +} + +static bool check_relative_address(int id, uint32_t addr, uint32_t *rel_addrp) +{ + uint32_t last_addr = last_addr; + int32_t rel_addr; + + if (last_addr == LAST_ADDR_INVALID) + return false; + + /* We may be in 23-bit addressing mode, which uses the id as the + * top two address bits. So, if we're referencing a different ID, + * use absolute addresses. + */ + if (((last_addr >> 21) & 0x3) != id) + return false; + + /* remove the top two bits from any 23-bit addressing */ + last_addr &= (1 << 21) - 1; + + /* We know that the addresses are limited to 21 bits, so this won't + * overflow the signed rel_addr */ + rel_addr = addr - last_addr; + if (rel_addr > 255 || rel_addr < -256) + return false; + + *rel_addrp = (uint32_t)rel_addr; + + return true; +} + +static void last_address_update(int id, bool valid, uint32_t addr) +{ + if (!valid) + last_addr = LAST_ADDR_INVALID; + else + last_addr = ((id & 0x3) << 21) | (addr & ~0x3); +} + +static void build_ar_command(struct fsi_gpio_msg *cmd, uint8_t id, + uint32_t addr, size_t size, const void *data) +{ + int i, addr_bits, opcode_bits; + bool write = !!data; + uint8_t ds, opcode; + uint32_t rel_addr; + + cmd->bits = 0; + cmd->msg = 0; + + /* we have 21 bits of address max */ + addr &= ((1 << 21) - 1); + + /* cmd opcodes are variable length - SAME_AR is only two bits */ + opcode_bits = 3; + + if (check_same_address(id, addr)) { + /* we still address the byte offset within the word */ + addr_bits = 2; + opcode_bits = 2; + opcode = FSI_GPIO_CMD_SAME_AR; + + } else if (check_relative_address(id, addr, &rel_addr)) { + /* 8 bits plus sign */ + addr_bits = 9; + addr = rel_addr; + opcode = FSI_GPIO_CMD_REL_AR; + + } else { + addr_bits = 21; + opcode = FSI_GPIO_CMD_ABS_AR; + } + + /* + * The read/write size is encoded in the lower bits of the address + * (as it must be naturally-aligned), and the following ds bit. + * + * size addr:1 addr:0 ds + * 1 x x 0 + * 2 x 0 1 + * 4 0 1 1 + * + */ + ds = size > 1 ? 1 : 0; + addr &= ~(size - 1); + if (size == 4) + addr |= 1; + + msg_push_bits(cmd, id, 2); + msg_push_bits(cmd, opcode, opcode_bits); + msg_push_bits(cmd, write ? 0 : 1, 1); + msg_push_bits(cmd, addr, addr_bits); + msg_push_bits(cmd, ds, 1); + for (i = 0; write && i < size; i++) + msg_push_bits(cmd, ((uint8_t *)data)[i], 8); + + msg_push_crc(cmd); +} + +void test_read(uint32_t addr) +{ + struct fsi_gpio_msg cmd; + uint32_t op, resp, crc; + uint8_t stat, rtag, rcrc; + int i; + + build_ar_command(&cmd, 0, addr, 4, NULL); + + /* Left align message */ + cmd.msg <<= (64 - cmd.bits); + + printf("msg: %d bits 0x%016llx\n", cmd.bits, cmd.msg); + + /* Store message into SRAM */ + // byteswap ? */ + writel(htonl(cmd.msg >> 32), sysreg + SRAM_BASE + CMD_DATA); + writel(htonl(cmd.msg & 0xffffffff), sysreg + SRAM_BASE + CMD_DATA + 4); + + op = CMD_COMMAND; + op |= cmd.bits << CMD_REG_CLEN_SHIFT; + op |= 32 << CMD_REG_RLEN_SHIFT; + writel(htonl(op), sysreg + SRAM_BASE + CMD_REG); + + /* Ring doorbell */ + writel(0x2, sysreg + CVIC_BASE + CVIC_TRIG_REG); + + do { + stat = readb(sysreg + SRAM_BASE + STAT_REG); + printf(" stat=%02x\n", stat); + } while(stat < STAT_COMPLETE || stat == 0xff); + + resp = ntohl(readl(sysreg + SRAM_BASE + RSP_DATA)); + rtag = readb(sysreg + SRAM_BASE + STAT_RTAG); + rcrc = readb(sysreg + SRAM_BASE + STAT_RCRC); + writeb(0, sysreg + SRAM_BASE + STAT_REG); + + printf("CMD=%08x STAT=%02x RTAG=%02x, RCRC=%02x, RDATA=%08x\n", + ntohl(readl(sysreg + SRAM_BASE + CMD_REG)), + stat, rtag, rcrc, resp); + + /* we have a whole message now; check CRC */ + crc = crc4(0, 1, 1); + crc = crc4(crc, resp, 32); + crc = crc4(crc, rcrc, 4); + if (crc) + printf("CRC ok !\n"); + else + printf("BAD CRC !!!\n"); + + for (i = 0; i < 256; i++) { + printf("%02x ", readb(sysreg + SRAM_BASE + TRACEBUF + i)); + if ((i % 16) == 15) + printf("\n"); + } + printf("\n"); +} + +static void dump_stuff(void) +{ + int i; + + printf("CMD:%08x STAT:%02x INT: %08x\n", + ntohl(readl(sysreg + SRAM_BASE + CMD_REG)), + readb(sysreg + SRAM_BASE + STAT_REG), + ntohl(readl(sysreg + SRAM_BASE + INT_CNT))); + + for (i = 0; i < 256; i++) { + printf("%02x ", readb(sysreg + SRAM_BASE + TRACEBUF + i)); + if ((i % 16) == 15) + printf("\n"); + } +} + +int test_read_fast(uint32_t addr, uint32_t *data) +{ + struct fsi_gpio_msg cmd; + uint32_t op, resp, crc; + uint8_t stat, rtag, rcrc; + uint32_t timeout = 100000; + + build_ar_command(&cmd, 0, addr, 4, NULL); + + /* Left align message */ + cmd.msg <<= (64 - cmd.bits); + + /* Store message into SRAM */ + writel(htonl(cmd.msg >> 32), sysreg + SRAM_BASE + CMD_DATA); + writel(htonl(cmd.msg & 0xffffffff), sysreg + SRAM_BASE + CMD_DATA + 4); + + op = CMD_COMMAND; + op |= cmd.bits << CMD_REG_CLEN_SHIFT; + op |= 32 << CMD_REG_RLEN_SHIFT; + writel(htonl(op), sysreg + SRAM_BASE + CMD_REG); + + /* Ring doorbell */ + writel(0x2, sysreg + CVIC_BASE + CVIC_TRIG_REG); + + do { + if (timeout-- == 0) { + printf("Timeout !\n"); + + dump_stuff(); + return -ETIMEDOUT; + } + stat = readb(sysreg + SRAM_BASE + STAT_REG); + } while(stat < STAT_COMPLETE || stat == 0xff); + + resp = ntohl(readl(sysreg + SRAM_BASE + RSP_DATA)); + rtag = readb(sysreg + SRAM_BASE + STAT_RTAG); + rcrc = readb(sysreg + SRAM_BASE + STAT_RCRC); + + /* Clear status reg */ + writeb(0, sysreg + SRAM_BASE + STAT_REG); + + /* we have a whole message now; check CRC */ + crc = crc4(0, 1, 1); + crc = crc4(crc, resp, 32); + crc = crc4(crc, rcrc, 4); + if (!crc) { + last_address_update(0, false, 0); + printf("BAD CRC !!!\n"); + return -ETIMEDOUT; + } + if (rtag & 3) { + printf("FSI error 0x%x\n", rtag & 3); + last_address_update(0, false, 0); + return -EIO; + } + last_address_update(0, true, addr); + *data = resp; + return 0; +} + +void bench(void) +{ + struct timespec t0, t1; + uint32_t val, orig; + uint64_t tns0, tns1; + int i, rc; + + printf("Bench...\n"); + rc = test_read_fast(0, &orig); + if (rc) + return; + clock_gettime(CLOCK_MONOTONIC, &t0); + for (i = 0; i < (0x100000 / 4); i++) { + rc = test_read_fast(0, &val); + if (rc) { + printf("Failed after %d iterations\n", i); + break; + } + if (val != orig) { + printf("mismatch ! %08x vs. %08x\n", val, orig); + break; + } + } + printf("\n"); + clock_gettime(CLOCK_MONOTONIC, &t1); + tns0 = t0.tv_sec * 1000000000ull + t0.tv_nsec; + tns1 = t1.tv_sec * 1000000000ull + t1.tv_nsec; + fprintf(stderr, "Spent: %lld ms\n", (tns1 - tns0) / 1000000); +} + + +int main(int argc, char *argv[]) +{ + uint32_t val; + + open_mem(); + + printf("Resetting ColdFire...\n"); + reset_cf(); + + printf("Setting up and starting ColdFire...\n"); + + setup_cf_maps(); + + load_cf_code(); + + gpio_source_cf(); + + /* Mark command invalid and clear sram */ + // writel(0xffffffff, sysreg + SRAM_BASE + CMD_REG); + memset(sysreg + SRAM_BASE, 0xff, 0x1000); + + /* Start ColdFire */ + start_cf(); + + /* Wait for ack */ + do { + val = readl(sysreg + SRAM_BASE + CMD_REG); + } while (val == 0xffffffff); + + printf("CMD:%08x STAT:%02x INT: %08x\n", + ntohl(readl(sysreg + SRAM_BASE + CMD_REG)), + readb(sysreg + SRAM_BASE + STAT_REG), + ntohl(readl(sysreg + SRAM_BASE + INT_CNT))); + + /* Enable interrupt */ + writel(0x2, sysreg + CVIC_BASE + CVIC_EN_REG); + + last_address_update(0, false, 0); + + /* Test read */ + test_read(0); + test_read(4); + + bench(); + + gpio_source_arm(); + + return 0; +} + + diff --git a/cf-wrapper.S b/cf-wrapper.S new file mode 100644 index 0000000..9b8f5cf --- /dev/null +++ b/cf-wrapper.S @@ -0,0 +1,9 @@ + .data + .align 8 + .globl cf_code_start +cf_code_start: + .incbin "cf-code.bin" + .align 8 + .globl cf_code_end +cf_code_end: + .previous |