#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \ : : "r" (0) : "memory") static inline uint8_t readb(void *addr) { dsb(); return *(volatile uint8_t *)addr; } static inline uint16_t readw(void *addr) { dsb(); return *(volatile uint16_t *)addr; } static inline uint32_t readl(void *addr) { dsb(); return *(volatile uint32_t *)addr; } static inline void writeb(uint8_t val, void *addr) { dsb(); *(volatile uint8_t *)addr = val; } static inline void writew(uint16_t val, void *addr) { dsb(); *(volatile uint16_t *)addr = val; } static inline void writel(uint32_t val, void *addr) { dsb(); *(volatile uint32_t *)addr = val; } static inline void writeq(uint64_t val, void *addr) { dsb(); *(volatile uint64_t *)addr = val; } #define SCU_REGS 0x000e2000 /* 1e6e2000 */ #define SCU_COPRO_CTRL (SCU_REGS + 0x100) #define SCU_COPRO_RESET 0x00000002 #define SCU_COPRO_CLK_EN 0x00000001 #define SCU_COPRO_SEG0 (SCU_REGS + 0x104) /* 1M */ #define SCU_COPRO_SEG1 (SCU_REGS + 0x108) /* 1M */ #define SCU_COPRO_SEG2 (SCU_REGS + 0x10c) /* 1M */ #define SCU_COPRO_SEG3 (SCU_REGS + 0x110) /* 1M */ #define SCU_COPRO_SEG4 (SCU_REGS + 0x114) /* 1M */ #define SCU_COPRO_SEG5 (SCU_REGS + 0x118) /* 1M */ #define SCU_COPRO_SEG6 (SCU_REGS + 0x11c) /* 1M */ #define SCU_COPRO_SEG7 (SCU_REGS + 0x120) /* 1M */ #define SCU_COPRO_SEG8 (SCU_REGS + 0x124) /* 8M */ #define SCU_COPRO_SEG_SWAP 0x00000001 #define SCU_COPRO_CACHE_CTL (SCU_REGS + 0x128) #define SCU_COPRO_CACHE_EN 0x00000001 #define SCU_COPRO_SEG0_CACHE_EN 0x00000002 #define SCU_COPRO_SEG1_CACHE_EN 0x00000004 #define SCU_COPRO_SEG2_CACHE_EN 0x00000008 #define SCU_COPRO_SEG3_CACHE_EN 0x00000010 #define SCU_COPRO_SEG4_CACHE_EN 0x00000020 #define SCU_COPRO_SEG5_CACHE_EN 0x00000040 #define SCU_COPRO_SEG6_CACHE_EN 0x00000080 #define SCU_COPRO_SEG7_CACHE_EN 0x00000100 #define SCU_COPRO_SEG8_CACHE_EN 0x00000200 #define COPRO_ICACHE_FLUSH_REG 0x00008000 #define COPRO_DCACHE_FLUSH_REG 0x00008004 #define SRAM_BASE 0x00120000 /* 1e720000 - actually 36K */ #define SRAM_SIZE 0x00008000 #define GPIO_REGS 0x00180000 /* 1e780000 */ #define GPIO_YZAAAB_CMDSRC0 (GPIO_REGS + 0x170) #define GPIO_YZAAAB_CMDSRC1 (GPIO_REGS + 0x174) #define GPIO_QRST_CMDSRC0 (GPIO_REGS + 0x110) #define GPIO_QRST_CMDSRC1 (GPIO_REGS + 0x114) #define GPIO_AA_SRC_BIT 0x00010000 #define GPIO_R_SRC_BIT 0x00000100 #define CVIC_BASE 0x000c2000 #define CVIC_EN_REG 0x10 #define CVIC_TRIG_REG 0x18 static void *sysreg; #define SYSREG_BASE 0x1e600000 /* System registers */ #define SYSREG_SIZE 0x00200000 /* 2M*/ static void *cfmem; #define CFMEM_BASE 0x9ef00000 /* Reserved memory */ #define CFMEM_SIZE 0x00100000 /* 1M */ #define CMD_REG 0x00 #define CMD_REG_CMD_MASK 0x000000ff #define CMD_REG_CMD_SHIFT 0 #define CMD_NONE 0x00 #define CMD_COMMAND 0x01 #define CMD_BREAK 0x02 #define CMD_INVALID 0xff #define CMD_REG_CLEN_MASK 0x0000ff00 #define CMD_REG_CLEN_SHIFT 8 #define CMD_REG_RLEN_MASK 0x00ff0000 #define CMD_REG_RLEN_SHIFT 16 #define STAT_REG 0x04 #define STAT_STOPPED 0x00 #define STAT_SENDING 0x01 #define STAT_COMPLETE 0x02 #define STAT_ERR_INVAL_CMD 0x80 #define STAT_ERR_INVAL_IRQ 0x81 #define STAT_ERR_MTOE 0x83 #define STAT_RTAG 0x05 #define STAT_RCRC 0x06 #define CMD_DATA 0x10 /* 64 bit of data left aligned */ #define RSP_DATA 0x20 /* 32 bit of data right aligned */ #define INT_CNT 0x30 /* debug: interrupt count */ #define BAD_INT_VEC 0x34 /* debug: vector of bad interrupt */ #define TRACEBUF 0x40 #define TR_CLK0START 0x01 #define TR_CLKOBIT0 0x02 #define TR_CLKOBIT1 0x03 #define TR_CLKZ 0x04 /* + # */ #define TR_CLKWSTART 0x05 /* + bit */ #define FSI_GPIO_CMD_DPOLL 0x2 #define FSI_GPIO_CMD_EPOLL 0x3 #define FSI_GPIO_CMD_TERM 0x3f #define FSI_GPIO_CMD_ABS_AR 0x4 #define FSI_GPIO_CMD_REL_AR 0x5 #define FSI_GPIO_CMD_SAME_AR 0x3 /* but only a 2-bit opcode... */ #define LAST_ADDR_INVALID 0x1 uint32_t last_addr; static void open_mem(void) { int fd; fd = open("/dev/mem", O_RDWR | O_SYNC); if (fd < 0) { perror("can't open /dev/mem"); exit(1); } sysreg = mmap(0, SYSREG_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, SYSREG_BASE); if (sysreg == MAP_FAILED) { perror("can't map system registers via /dev/mem"); exit(1); } cfmem = mmap(0, CFMEM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, CFMEM_BASE); if (cfmem == MAP_FAILED) { perror("can't map CF memory via /dev/mem"); exit(1); } } static void setup_cf_maps(void) { /* * Note about byteswap setting: the bus is wired backwards, * so setting the byteswap bit actually makes the ColdFire * work "normally" for a BE processor, ie, put the MSB in * the lowest address byte. * * We thus need to set the bit for our main memory which * contains our program code. We create two mappings for * the register, one with each setting. * * Segments 2 and 3 has a "swapped" mapping (BE) * and 6 and 7 have a non-swapped mapping (LE) which allows * us to avoid byteswapping register accesses since the * registers are all LE. */ /* Setup segment 0 to our memory region */ writel(CFMEM_BASE | SCU_COPRO_SEG_SWAP, sysreg + SCU_COPRO_SEG0); /* Segments 2 and 3 to sysregs with byteswap (SRAM) */ writel(SYSREG_BASE | SCU_COPRO_SEG_SWAP, sysreg + SCU_COPRO_SEG2); writel(SYSREG_BASE | 0x100000 | SCU_COPRO_SEG_SWAP, sysreg + SCU_COPRO_SEG3); /* And segment 6 and 7 to our registers */ writel(SYSREG_BASE, sysreg + SCU_COPRO_SEG6); writel(SYSREG_BASE | 0x100000, sysreg + SCU_COPRO_SEG7); /* Memory cachable, regs and SRAM not cachable */ writel(SCU_COPRO_SEG0_CACHE_EN | SCU_COPRO_CACHE_EN, sysreg + SCU_COPRO_CACHE_CTL); } static void reset_cf(void) { writel(SCU_COPRO_RESET, sysreg + SCU_COPRO_CTRL); usleep(10); writel(0, sysreg + SCU_COPRO_CTRL); } static void start_cf(void) { writel(SCU_COPRO_CLK_EN, sysreg + SCU_COPRO_CTRL); } static void load_cf_code(void) { extern uint8_t cf_code_start, cf_code_end; uint8_t *code = &cf_code_start; uint8_t *mem = cfmem; while(code < &cf_code_end) writeb(*(code++), mem++); } static void gpio_source_arm(void) { uint32_t val; /* ARM = 00 */ val = readl(sysreg + GPIO_YZAAAB_CMDSRC0); val &= ~GPIO_AA_SRC_BIT; writel(val, sysreg + GPIO_YZAAAB_CMDSRC0); val = readl(sysreg + GPIO_YZAAAB_CMDSRC1); val &= ~GPIO_AA_SRC_BIT; writel(val, sysreg + GPIO_YZAAAB_CMDSRC1); val = readl(sysreg + GPIO_QRST_CMDSRC0); val &= ~GPIO_R_SRC_BIT; writel(val, sysreg + GPIO_QRST_CMDSRC0); val = readl(sysreg + GPIO_QRST_CMDSRC1); val &= ~GPIO_R_SRC_BIT; writel(val, sysreg + GPIO_QRST_CMDSRC1); } static void gpio_source_cf(void) { uint32_t val; /* CF = 10 */ val = readl(sysreg + GPIO_YZAAAB_CMDSRC0); val &= ~GPIO_AA_SRC_BIT; writel(val, sysreg + GPIO_YZAAAB_CMDSRC0); val = readl(sysreg + GPIO_YZAAAB_CMDSRC1); val |= GPIO_AA_SRC_BIT; writel(val, sysreg + GPIO_YZAAAB_CMDSRC1); val = readl(sysreg + GPIO_QRST_CMDSRC0); val &= ~GPIO_R_SRC_BIT; writel(val, sysreg + GPIO_QRST_CMDSRC0); val = readl(sysreg + GPIO_QRST_CMDSRC1); val |= GPIO_R_SRC_BIT; writel(val, sysreg + GPIO_QRST_CMDSRC1); } static const uint8_t crc4_tab[] = { 0x0, 0x7, 0xe, 0x9, 0xb, 0xc, 0x5, 0x2, 0x1, 0x6, 0xf, 0x8, 0xa, 0xd, 0x4, 0x3, }; /** * crc4 - calculate the 4-bit crc of a value. * @crc: starting crc4 * @x: value to checksum * @bits: number of bits in @x to checksum * * Returns the crc4 value of @x, using polynomial 0b10111. * * The @x value is treated as left-aligned, and bits above @bits are ignored * in the crc calculations. */ static uint8_t crc4(uint8_t c, uint64_t x, int bits) { int i; /* mask off anything above the top bit */ x &= (1ull << bits) - 1; /* Align to 4-bits */ bits = (bits + 3) & ~0x3; /* Calculate crc4 over four-bit nibbles, starting at the MSbit */ for (i = bits - 4; i >= 0; i -= 4) c = crc4_tab[c ^ ((x >> i) & 0xf)]; return c; } struct fsi_gpio_msg { uint64_t msg; uint8_t bits; }; static void msg_push_bits(struct fsi_gpio_msg *msg, uint64_t data, int bits) { msg->msg <<= bits; msg->msg |= data & ((1ull << bits) - 1); msg->bits += bits; } static void msg_push_crc(struct fsi_gpio_msg *msg) { uint8_t crc; int top; top = msg->bits & 0x3; /* start bit, and any non-aligned top bits */ crc = crc4(0, 1 << top | msg->msg >> (msg->bits - top), top + 1); /* aligned bits */ crc = crc4(crc, msg->msg, msg->bits - top); msg_push_bits(msg, crc, 4); } static bool check_same_address(int id, uint32_t addr) { /* this will also handle LAST_ADDR_INVALID */ return last_addr == (((id & 0x3) << 21) | (addr & ~0x3)); } static bool check_relative_address(int id, uint32_t addr, uint32_t *rel_addrp) { uint32_t last_addr = last_addr; int32_t rel_addr; if (last_addr == LAST_ADDR_INVALID) return false; /* We may be in 23-bit addressing mode, which uses the id as the * top two address bits. So, if we're referencing a different ID, * use absolute addresses. */ if (((last_addr >> 21) & 0x3) != id) return false; /* remove the top two bits from any 23-bit addressing */ last_addr &= (1 << 21) - 1; /* We know that the addresses are limited to 21 bits, so this won't * overflow the signed rel_addr */ rel_addr = addr - last_addr; if (rel_addr > 255 || rel_addr < -256) return false; *rel_addrp = (uint32_t)rel_addr; return true; } static void last_address_update(int id, bool valid, uint32_t addr) { if (!valid) last_addr = LAST_ADDR_INVALID; else last_addr = ((id & 0x3) << 21) | (addr & ~0x3); } static void build_ar_command(struct fsi_gpio_msg *cmd, uint8_t id, uint32_t addr, size_t size, const void *data) { int i, addr_bits, opcode_bits; bool write = !!data; uint8_t ds, opcode; uint32_t rel_addr; cmd->bits = 0; cmd->msg = 0; /* we have 21 bits of address max */ addr &= ((1 << 21) - 1); /* cmd opcodes are variable length - SAME_AR is only two bits */ opcode_bits = 3; if (check_same_address(id, addr)) { /* we still address the byte offset within the word */ addr_bits = 2; opcode_bits = 2; opcode = FSI_GPIO_CMD_SAME_AR; } else if (check_relative_address(id, addr, &rel_addr)) { /* 8 bits plus sign */ addr_bits = 9; addr = rel_addr; opcode = FSI_GPIO_CMD_REL_AR; } else { addr_bits = 21; opcode = FSI_GPIO_CMD_ABS_AR; } /* * The read/write size is encoded in the lower bits of the address * (as it must be naturally-aligned), and the following ds bit. * * size addr:1 addr:0 ds * 1 x x 0 * 2 x 0 1 * 4 0 1 1 * */ ds = size > 1 ? 1 : 0; addr &= ~(size - 1); if (size == 4) addr |= 1; msg_push_bits(cmd, id, 2); msg_push_bits(cmd, opcode, opcode_bits); msg_push_bits(cmd, write ? 0 : 1, 1); msg_push_bits(cmd, addr, addr_bits); msg_push_bits(cmd, ds, 1); for (i = 0; write && i < size; i++) msg_push_bits(cmd, ((uint8_t *)data)[i], 8); msg_push_crc(cmd); } void test_read(uint32_t addr) { struct fsi_gpio_msg cmd; uint32_t op, resp, crc; uint8_t stat, rtag, rcrc; int i; build_ar_command(&cmd, 0, addr, 4, NULL); /* Left align message */ cmd.msg <<= (64 - cmd.bits); printf("msg: %d bits 0x%016llx\n", cmd.bits, cmd.msg); /* Store message into SRAM */ // byteswap ? */ writel(htonl(cmd.msg >> 32), sysreg + SRAM_BASE + CMD_DATA); writel(htonl(cmd.msg & 0xffffffff), sysreg + SRAM_BASE + CMD_DATA + 4); op = CMD_COMMAND; op |= cmd.bits << CMD_REG_CLEN_SHIFT; op |= 32 << CMD_REG_RLEN_SHIFT; writel(htonl(op), sysreg + SRAM_BASE + CMD_REG); /* Ring doorbell */ writel(0x2, sysreg + CVIC_BASE + CVIC_TRIG_REG); do { stat = readb(sysreg + SRAM_BASE + STAT_REG); printf(" stat=%02x\n", stat); } while(stat < STAT_COMPLETE || stat == 0xff); resp = ntohl(readl(sysreg + SRAM_BASE + RSP_DATA)); rtag = readb(sysreg + SRAM_BASE + STAT_RTAG); rcrc = readb(sysreg + SRAM_BASE + STAT_RCRC); writeb(0, sysreg + SRAM_BASE + STAT_REG); printf("CMD=%08x STAT=%02x RTAG=%02x, RCRC=%02x, RDATA=%08x\n", ntohl(readl(sysreg + SRAM_BASE + CMD_REG)), stat, rtag, rcrc, resp); /* we have a whole message now; check CRC */ crc = crc4(0, 1, 1); crc = crc4(crc, resp, 32); crc = crc4(crc, rcrc, 4); if (crc) printf("CRC ok !\n"); else printf("BAD CRC !!!\n"); for (i = 0; i < 256; i++) { printf("%02x ", readb(sysreg + SRAM_BASE + TRACEBUF + i)); if ((i % 16) == 15) printf("\n"); } printf("\n"); } static void dump_stuff(void) { int i; printf("CMD:%08x STAT:%02x INT: %08x\n", ntohl(readl(sysreg + SRAM_BASE + CMD_REG)), readb(sysreg + SRAM_BASE + STAT_REG), ntohl(readl(sysreg + SRAM_BASE + INT_CNT))); for (i = 0; i < 256; i++) { printf("%02x ", readb(sysreg + SRAM_BASE + TRACEBUF + i)); if ((i % 16) == 15) printf("\n"); } } int test_read_fast(uint32_t addr, uint32_t *data) { struct fsi_gpio_msg cmd; uint32_t op, resp, crc; uint8_t stat, rtag, rcrc; uint32_t timeout = 100000; build_ar_command(&cmd, 0, addr, 4, NULL); /* Left align message */ cmd.msg <<= (64 - cmd.bits); /* Store message into SRAM */ writel(htonl(cmd.msg >> 32), sysreg + SRAM_BASE + CMD_DATA); writel(htonl(cmd.msg & 0xffffffff), sysreg + SRAM_BASE + CMD_DATA + 4); op = CMD_COMMAND; op |= cmd.bits << CMD_REG_CLEN_SHIFT; op |= 32 << CMD_REG_RLEN_SHIFT; writel(htonl(op), sysreg + SRAM_BASE + CMD_REG); /* Ring doorbell */ writel(0x2, sysreg + CVIC_BASE + CVIC_TRIG_REG); do { if (timeout-- == 0) { printf("Timeout !\n"); dump_stuff(); return -ETIMEDOUT; } stat = readb(sysreg + SRAM_BASE + STAT_REG); } while(stat < STAT_COMPLETE || stat == 0xff); resp = ntohl(readl(sysreg + SRAM_BASE + RSP_DATA)); rtag = readb(sysreg + SRAM_BASE + STAT_RTAG); rcrc = readb(sysreg + SRAM_BASE + STAT_RCRC); /* Clear status reg */ writeb(0, sysreg + SRAM_BASE + STAT_REG); /* we have a whole message now; check CRC */ crc = crc4(0, 1, 1); crc = crc4(crc, resp, 32); crc = crc4(crc, rcrc, 4); if (!crc) { last_address_update(0, false, 0); printf("BAD CRC !!!\n"); return -ETIMEDOUT; } if (rtag & 3) { printf("FSI error 0x%x\n", rtag & 3); last_address_update(0, false, 0); return -EIO; } last_address_update(0, true, addr); *data = resp; return 0; } void bench(void) { struct timespec t0, t1; uint32_t val, orig; uint64_t tns0, tns1; int i, rc; printf("Bench...\n"); rc = test_read_fast(0, &orig); if (rc) return; clock_gettime(CLOCK_MONOTONIC, &t0); for (i = 0; i < (0x100000 / 4); i++) { rc = test_read_fast(0, &val); if (rc) { printf("Failed after %d iterations\n", i); break; } if (val != orig) { printf("mismatch ! %08x vs. %08x\n", val, orig); break; } } printf("\n"); clock_gettime(CLOCK_MONOTONIC, &t1); tns0 = t0.tv_sec * 1000000000ull + t0.tv_nsec; tns1 = t1.tv_sec * 1000000000ull + t1.tv_nsec; fprintf(stderr, "Spent: %lld ms\n", (tns1 - tns0) / 1000000); } int main(int argc, char *argv[]) { uint32_t val; open_mem(); printf("Resetting ColdFire...\n"); reset_cf(); printf("Setting up and starting ColdFire...\n"); setup_cf_maps(); load_cf_code(); gpio_source_cf(); /* Mark command invalid and clear sram */ // writel(0xffffffff, sysreg + SRAM_BASE + CMD_REG); memset(sysreg + SRAM_BASE, 0xff, 0x1000); /* Start ColdFire */ start_cf(); /* Wait for ack */ do { val = readl(sysreg + SRAM_BASE + CMD_REG); } while (val == 0xffffffff); printf("CMD:%08x STAT:%02x INT: %08x\n", ntohl(readl(sysreg + SRAM_BASE + CMD_REG)), readb(sysreg + SRAM_BASE + STAT_REG), ntohl(readl(sysreg + SRAM_BASE + INT_CNT))); /* Enable interrupt */ writel(0x2, sysreg + CVIC_BASE + CVIC_EN_REG); last_address_update(0, false, 0); /* Test read */ test_read(0); test_read(4); bench(); gpio_source_arm(); return 0; }