diff options
Diffstat (limited to 'arch')
127 files changed, 4933 insertions, 761 deletions
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index 2dbdf59258d9..f9d4e6b6d4bd 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -32,6 +32,7 @@ #define MAP_NONBLOCK 0x40000 /* do not block on IO */ #define MAP_STACK 0x80000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x100000 /* create a huge page mapping */ +#define MAP_FIXED_NOREPLACE 0x200000/* MAP_FIXED which doesn't unmap underlying mapping */ #define MS_ASYNC 1 /* sync memory asynchronously */ #define MS_SYNC 2 /* synchronous memory sync */ diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 74504b154256..869080bedb89 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -318,10 +318,8 @@ static inline void flush_anon_page(struct vm_area_struct *vma, #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE extern void flush_kernel_dcache_page(struct page *); -#define flush_dcache_mmap_lock(mapping) \ - spin_lock_irq(&(mapping)->tree_lock) -#define flush_dcache_mmap_unlock(mapping) \ - spin_unlock_irq(&(mapping)->tree_lock) +#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) +#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) #define flush_icache_user_range(vma,page,addr,len) \ flush_dcache_page(page) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 496667703693..ed8fd0d19a3e 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -22,12 +22,6 @@ #include <mach/memory.h> #endif -/* - * Allow for constants defined here to be used from assembly code - * by prepending the UL suffix only with actual C code compilation. - */ -#define UL(x) _AC(x, UL) - /* PAGE_OFFSET - the virtual address of the start of the kernel image */ #define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET) diff --git a/arch/arm/mach-npcm/npcm7xx.c b/arch/arm/mach-npcm/npcm7xx.c index 5f7cd88103ef..c5f77d854c4f 100644 --- a/arch/arm/mach-npcm/npcm7xx.c +++ b/arch/arm/mach-npcm/npcm7xx.c @@ -17,4 +17,6 @@ static const char *const npcm7xx_dt_match[] = { DT_MACHINE_START(NPCM7XX_DT, "NPCM7XX Chip family") .atag_offset = 0x100, .dt_compat = npcm7xx_dt_match, + .l2c_aux_val = 0x0, + .l2c_aux_mask = ~0x0, MACHINE_END diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ada8eb206a90..8c398fedbbb6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -466,6 +466,12 @@ void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) void __init dma_contiguous_remap(void) { int i; + + if (!dma_mmu_remap_num) + return; + + /* call flush_cache_all() since CMA area would be large enough */ + flush_cache_all(); for (i = 0; i < dma_mmu_remap_num; i++) { phys_addr_t start = dma_mmu_remap[i].base; phys_addr_t end = start + dma_mmu_remap[i].size; @@ -498,7 +504,15 @@ void __init dma_contiguous_remap(void) flush_tlb_kernel_range(__phys_to_virt(start), __phys_to_virt(end)); - iotable_init(&map, 1); + /* + * All the memory in CMA region will be on ZONE_MOVABLE. + * If that zone is considered as highmem, the memory in CMA + * region is also considered as highmem even if it's + * physical address belong to lowmem. In this case, + * re-mapping isn't required. + */ + if (!is_highmem_idx(ZONE_MOVABLE)) + iotable_init(&map, 1); } } diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index eb1de66517d5..f866870db749 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -21,20 +21,20 @@ #define MIN_GAP (128*1024*1024UL) #define MAX_GAP ((TASK_SIZE)/6*5) -static int mmap_is_legacy(void) +static int mmap_is_legacy(struct rlimit *rlim_stack) { if (current->personality & ADDR_COMPAT_LAYOUT) return 1; - if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + if (rlim_stack->rlim_cur == RLIM_INFINITY) return 1; return sysctl_legacy_va_layout; } -static unsigned long mmap_base(unsigned long rnd) +static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { - unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long gap = rlim_stack->rlim_cur; if (gap < MIN_GAP) gap = MIN_GAP; @@ -180,18 +180,18 @@ unsigned long arch_mmap_rnd(void) return rnd << PAGE_SHIFT; } -void arch_pick_mmap_layout(struct mm_struct *mm) +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { unsigned long random_factor = 0UL; if (current->flags & PF_RANDOMIZE) random_factor = arch_mmap_rnd(); - if (mmap_is_legacy()) { + if (mmap_is_legacy(rlim_stack)) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->get_unmapped_area = arch_get_unmapped_area; } else { - mm->mmap_base = mmap_base(random_factor); + mm->mmap_base = mmap_base(random_factor, rlim_stack); mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 7dfcec4700fe..0094c6653b06 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -140,10 +140,8 @@ static inline void __flush_icache_all(void) dsb(ish); } -#define flush_dcache_mmap_lock(mapping) \ - spin_lock_irq(&(mapping)->tree_lock) -#define flush_dcache_mmap_unlock(mapping) \ - spin_unlock_irq(&(mapping)->tree_lock) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) /* * We don't appear to need to do anything here. In fact, if we did, we'd diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 50fa96a49792..49d99214f43c 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -29,12 +29,6 @@ #include <asm/sizes.h> /* - * Allow for constants defined here to be used from assembly code - * by prepending the UL suffix only with actual C code compilation. - */ -#define UL(x) _AC(x, UL) - -/* * Size of the PCI I/O space. This must remain a power of two so that * IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses. */ diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index decccffb03ca..842c8a5fcd53 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -38,12 +38,12 @@ #define MIN_GAP (SZ_128M) #define MAX_GAP (STACK_TOP/6*5) -static int mmap_is_legacy(void) +static int mmap_is_legacy(struct rlimit *rlim_stack) { if (current->personality & ADDR_COMPAT_LAYOUT) return 1; - if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + if (rlim_stack->rlim_cur == RLIM_INFINITY) return 1; return sysctl_legacy_va_layout; @@ -62,9 +62,9 @@ unsigned long arch_mmap_rnd(void) return rnd << PAGE_SHIFT; } -static unsigned long mmap_base(unsigned long rnd) +static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { - unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap; /* Values close to RLIM_INFINITY can overflow. */ @@ -83,7 +83,7 @@ static unsigned long mmap_base(unsigned long rnd) * This function, called very early during the creation of a new process VM * image, sets up which VM layout function to use: */ -void arch_pick_mmap_layout(struct mm_struct *mm) +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { unsigned long random_factor = 0UL; @@ -94,11 +94,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * Fall back to the standard layout if the personality bit is set, or * if the expected stack growth is unlimited: */ - if (mmap_is_legacy()) { + if (mmap_is_legacy(rlim_stack)) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->get_unmapped_area = arch_get_unmapped_area; } else { - mm->mmap_base = mmap_base(random_factor); + mm->mmap_base = mmap_base(random_factor, rlim_stack); mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } diff --git a/arch/c6x/Makefile b/arch/c6x/Makefile index 6f6096ff05a4..6ab942e6c534 100644 --- a/arch/c6x/Makefile +++ b/arch/c6x/Makefile @@ -25,6 +25,7 @@ KBUILD_AFLAGS += -mbig-endian LINKFLAGS += -mbig-endian KBUILD_LDFLAGS += -mbig-endian LDFLAGS += -EB +CHECKFLAGS += -D_BIG_ENDIAN endif head-y := arch/c6x/kernel/head.o diff --git a/arch/c6x/kernel/asm-offsets.c b/arch/c6x/kernel/asm-offsets.c index cff57764fcad..0f8fde494875 100644 --- a/arch/c6x/kernel/asm-offsets.c +++ b/arch/c6x/kernel/asm-offsets.c @@ -107,7 +107,6 @@ void foo(void) /* These would be unneccessary if we ran asm files * through the preprocessor. */ - DEFINE(KTHREAD_SIZE, THREAD_SIZE); DEFINE(KTHREAD_SHIFT, THREAD_SHIFT); DEFINE(KTHREAD_START_SP, THREAD_START_SP); DEFINE(ENOSYS_, ENOSYS); diff --git a/arch/c6x/platforms/plldata.c b/arch/c6x/platforms/plldata.c index e8b6cc6a7b5a..1ef04b5ab93f 100644 --- a/arch/c6x/platforms/plldata.c +++ b/arch/c6x/platforms/plldata.c @@ -19,6 +19,7 @@ #include <asm/clock.h> #include <asm/setup.h> +#include <asm/special_insns.h> #include <asm/irq.h> /* diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h index 114b93488193..5de871eb4a59 100644 --- a/arch/microblaze/include/asm/pci.h +++ b/arch/microblaze/include/asm/pci.h @@ -47,9 +47,10 @@ extern int pci_proc_domain(struct pci_bus *bus); struct vm_area_struct; -/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */ -#define HAVE_PCI_MMAP 1 -#define arch_can_pci_mmap_io() 1 +/* Tell PCI code what kind of PCI resource mappings we support */ +#define HAVE_PCI_MMAP 1 +#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 +#define arch_can_pci_mmap_io() 1 extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t count); diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index e53b8532353c..db8b1fa83452 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -33,6 +33,8 @@ extern int mem_init_done; #define PAGE_KERNEL __pgprot(0) /* these mean nothing to non MMU */ #define pgprot_noncached(x) (x) +#define pgprot_writecombine pgprot_noncached +#define pgprot_device pgprot_noncached #define __swp_type(x) (0) #define __swp_offset(x) (0) diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index ae79e8638d50..161f9758c631 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -151,72 +151,22 @@ void pcibios_set_master(struct pci_dev *dev) } /* - * Platform support for /proc/bus/pci/X/Y mmap()s, - * modelled on the sparc64 implementation by Dave Miller. - * -- paulus. + * Platform support for /proc/bus/pci/X/Y mmap()s. */ -/* - * Adjust vm_pgoff of VMA such that it is the physical page offset - * corresponding to the 32-bit pci bus offset for DEV requested by the user. - * - * Basically, the user finds the base address for his device which he wishes - * to mmap. They read the 32-bit value from the config space base register, - * add whatever PAGE_SIZE multiple offset they wish, and feed this into the - * offset parameter of mmap on /proc/bus/pci/XXX for that device. - * - * Returns negative error code on failure, zero on success. - */ -static struct resource *__pci_mmap_make_offset(struct pci_dev *dev, - resource_size_t *offset, - enum pci_mmap_state mmap_state) +int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - unsigned long io_offset = 0; - int i, res_bit; + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + resource_size_t ioaddr = pci_resource_start(pdev, bar); if (!hose) - return NULL; /* should never happen */ - - /* If memory, add on the PCI bridge address offset */ - if (mmap_state == pci_mmap_mem) { -#if 0 /* See comment in pci_resource_to_user() for why this is disabled */ - *offset += hose->pci_mem_offset; -#endif - res_bit = IORESOURCE_MEM; - } else { - io_offset = (unsigned long)hose->io_base_virt - _IO_BASE; - *offset += io_offset; - res_bit = IORESOURCE_IO; - } - - /* - * Check that the offset requested corresponds to one of the - * resources of the device. - */ - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - struct resource *rp = &dev->resource[i]; - int flags = rp->flags; + return -EINVAL; /* should never happen */ - /* treat ROM as memory (should be already) */ - if (i == PCI_ROM_RESOURCE) - flags |= IORESOURCE_MEM; - - /* Active and same type? */ - if ((flags & res_bit) == 0) - continue; - - /* In the range of this resource? */ - if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end) - continue; - - /* found it! construct the final physical address */ - if (mmap_state == pci_mmap_io) - *offset += hose->io_base_phys - io_offset; - return rp; - } + /* Convert to an offset within this PCI controller */ + ioaddr -= (unsigned long)hose->io_base_virt - _IO_BASE; - return NULL; + vma->vm_pgoff += (ioaddr + hose->io_base_phys) >> PAGE_SHIFT; + return 0; } /* @@ -268,37 +218,6 @@ pgprot_t pci_phys_mem_access_prot(struct file *file, return prot; } -/* - * Perform the actual remap of the pages for a PCI device mapping, as - * appropriate for this architecture. The region in the process to map - * is described by vm_start and vm_end members of VMA, the base physical - * address is found in vm_pgoff. - * The pci device structure is provided so that architectures may make mapping - * decisions on a per-device or per-bus basis. - * - * Returns a negative error code on failure, zero on success. - */ -int pci_mmap_page_range(struct pci_dev *dev, int bar, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine) -{ - resource_size_t offset = - ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT; - struct resource *rp; - int ret; - - rp = __pci_mmap_make_offset(dev, &offset, mmap_state); - if (rp == NULL) - return -EINVAL; - - vma->vm_pgoff = offset >> PAGE_SHIFT; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, vma->vm_page_prot); - - return ret; -} - /* This provides legacy IO read access on a bus */ int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size) { diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 61e9a24297b7..225c95da23ce 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2029,6 +2029,7 @@ config CPU_MIPSR6 select CPU_HAS_RIXI select HAVE_ARCH_BITREVERSE select MIPS_ASID_BITS_VARIABLE + select MIPS_CRC_SUPPORT select MIPS_SPRAM config EVA @@ -2502,6 +2503,9 @@ config MIPS_ASID_BITS config MIPS_ASID_BITS_VARIABLE bool +config MIPS_CRC_SUPPORT + bool + # # - Highmem only makes sense for the 32-bit kernel. # - The current highmem code will only work properly on physically indexed @@ -2850,8 +2854,7 @@ config CRASH_DUMP config PHYSICAL_START hex "Physical address where the kernel is loaded" - default "0xffffffff84000000" if 64BIT - default "0x84000000" if 32BIT + default "0xffffffff84000000" depends on CRASH_DUMP help This gives the CKSEG0 or KSEG0 address where the kernel is loaded. diff --git a/arch/mips/Makefile b/arch/mips/Makefile index d1ca839c3981..5e9fce076ab6 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -222,6 +222,8 @@ xpa-cflags-y := $(mips-cflags) xpa-cflags-$(micromips-ase) += -mmicromips -Wa$(comma)-fatal-warnings toolchain-xpa := $(call cc-option-yn,$(xpa-cflags-y) -mxpa) cflags-$(toolchain-xpa) += -DTOOLCHAIN_SUPPORTS_XPA +toolchain-crc := $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mcrc) +cflags-$(toolchain-crc) += -DTOOLCHAIN_SUPPORTS_CRC # # Firmware support @@ -249,20 +251,12 @@ ifdef CONFIG_PHYSICAL_START load-y = $(CONFIG_PHYSICAL_START) endif -entry-noisa-y = 0x$(shell $(NM) vmlinux 2>/dev/null \ - | grep "\bkernel_entry\b" | cut -f1 -d \ ) -ifdef CONFIG_CPU_MICROMIPS - # - # Set the ISA bit, since the kernel_entry symbol in the ELF will have it - # clear which would lead to images containing addresses which bootloaders may - # jump to as MIPS32 code. - # - entry-y = $(patsubst %0,%1,$(patsubst %2,%3,$(patsubst %4,%5, \ - $(patsubst %6,%7,$(patsubst %8,%9,$(patsubst %a,%b, \ - $(patsubst %c,%d,$(patsubst %e,%f,$(entry-noisa-y))))))))) -else - entry-y = $(entry-noisa-y) -endif +# Sign-extend the entry point to 64 bits if retrieved as a 32-bit number. +entry-y = $(shell $(OBJDUMP) -f vmlinux 2>/dev/null \ + | sed -n '/^start address / { \ + s/^.* //; \ + s/0x\([0-7].......\)$$/0x00000000\1/; \ + s/0x\(........\)$$/0xffffffff\1/; p }') cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic drivers-$(CONFIG_PCI) += arch/mips/pci/ @@ -330,6 +324,7 @@ libs-y += arch/mips/math-emu/ # See arch/mips/Kbuild for content of core part of the kernel core-y += arch/mips/ +drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/ drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/ # suspend and hibernation support @@ -473,6 +468,21 @@ define archhelp echo echo ' {micro32,32,64}{r1,r2,r6}{el,}_defconfig <BOARDS="list of boards">' echo + echo ' Where BOARDS is some subset of the following:' + for board in $(sort $(BOARDS)); do echo " $${board}"; done + echo + echo ' Specifically the following generic default configurations are' + echo ' supported:' + echo + $(foreach cfg,$(generic_defconfigs), + printf " %-24s - Build generic kernel for $(call describe_generic_defconfig,$(cfg))\n" $(cfg);) + echo + echo ' The following legacy default configurations have been converted to' + echo ' generic and can still be used:' + echo + $(foreach cfg,$(sort $(legacy_defconfigs)), + printf " %-24s - Build $($(cfg)-y)\n" $(cfg);) + echo echo ' Otherwise, the following default configurations are available:' endef @@ -507,6 +517,10 @@ endef $(eval $(call gen_generic_defconfigs,32 64,r1 r2 r6,eb el)) $(eval $(call gen_generic_defconfigs,micro32,r2,eb el)) +define describe_generic_defconfig +$(subst 32r,MIPS32 r,$(subst 64r,MIPS64 r,$(subst el, little endian,$(patsubst %_defconfig,%,$(1))))) +endef + .PHONY: $(generic_defconfigs) $(generic_defconfigs): $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ @@ -543,14 +557,18 @@ generic_defconfig: # now that the boards have been converted to use the generic kernel they are # wrappers around the generic rules above. # -.PHONY: sead3_defconfig -sead3_defconfig: - $(Q)$(MAKE) -f $(srctree)/Makefile 32r2el_defconfig BOARDS=sead-3 +legacy_defconfigs += ocelot_defconfig +ocelot_defconfig-y := 32r2el_defconfig BOARDS=ocelot + +legacy_defconfigs += sead3_defconfig +sead3_defconfig-y := 32r2el_defconfig BOARDS=sead-3 + +legacy_defconfigs += sead3micro_defconfig +sead3micro_defconfig-y := micro32r2el_defconfig BOARDS=sead-3 -.PHONY: sead3micro_defconfig -sead3micro_defconfig: - $(Q)$(MAKE) -f $(srctree)/Makefile micro32r2el_defconfig BOARDS=sead-3 +legacy_defconfigs += xilfpga_defconfig +xilfpga_defconfig-y := 32r2el_defconfig BOARDS=xilfpga -.PHONY: xilfpga_defconfig -xilfpga_defconfig: - $(Q)$(MAKE) -f $(srctree)/Makefile 32r2el_defconfig BOARDS=xilfpga +.PHONY: $(legacy_defconfigs) +$(legacy_defconfigs): + $(Q)$(MAKE) -f $(srctree)/Makefile $($@-y) diff --git a/arch/mips/alchemy/board-gpr.c b/arch/mips/alchemy/board-gpr.c index 328d697e72b4..4e79dbd54a33 100644 --- a/arch/mips/alchemy/board-gpr.c +++ b/arch/mips/alchemy/board-gpr.c @@ -190,7 +190,7 @@ static struct platform_device gpr_mtd_device = { /* * LEDs */ -static struct gpio_led gpr_gpio_leds[] = { +static const struct gpio_led gpr_gpio_leds[] = { { /* green */ .name = "gpr:green", .gpio = 4, diff --git a/arch/mips/alchemy/board-mtx1.c b/arch/mips/alchemy/board-mtx1.c index 85bb75669b0d..aab55aaf3d62 100644 --- a/arch/mips/alchemy/board-mtx1.c +++ b/arch/mips/alchemy/board-mtx1.c @@ -145,7 +145,7 @@ static struct platform_device mtx1_wdt = { .resource = mtx1_wdt_res, }; -static struct gpio_led default_leds[] = { +static const struct gpio_led default_leds[] = { { .name = "mtx1:green", .gpio = 211, diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index e1675c25d5d4..f09262e0a72f 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -346,7 +346,7 @@ static struct platform_device ar7_udc = { /***************************************************************************** * LEDs ****************************************************************************/ -static struct gpio_led default_leds[] = { +static const struct gpio_led default_leds[] = { { .name = "status", .gpio = 8, @@ -354,12 +354,12 @@ static struct gpio_led default_leds[] = { }, }; -static struct gpio_led titan_leds[] = { +static const struct gpio_led titan_leds[] = { { .name = "status", .gpio = 8, .active_low = 1, }, { .name = "wifi", .gpio = 13, .active_low = 1, }, }; -static struct gpio_led dsl502t_leds[] = { +static const struct gpio_led dsl502t_leds[] = { { .name = "status", .gpio = 9, @@ -377,7 +377,7 @@ static struct gpio_led dsl502t_leds[] = { }, }; -static struct gpio_led dg834g_leds[] = { +static const struct gpio_led dg834g_leds[] = { { .name = "ppp", .gpio = 6, @@ -406,7 +406,7 @@ static struct gpio_led dg834g_leds[] = { }, }; -static struct gpio_led fb_sl_leds[] = { +static const struct gpio_led fb_sl_leds[] = { { .name = "1", .gpio = 7, @@ -433,7 +433,7 @@ static struct gpio_led fb_sl_leds[] = { }, }; -static struct gpio_led fb_fon_leds[] = { +static const struct gpio_led fb_fon_leds[] = { { .name = "1", .gpio = 8, @@ -459,7 +459,7 @@ static struct gpio_led fb_fon_leds[] = { }, }; -static struct gpio_led gt701_leds[] = { +static const struct gpio_led gt701_leds[] = { { .name = "inet:green", .gpio = 13, diff --git a/arch/mips/bcm47xx/buttons.c b/arch/mips/bcm47xx/buttons.c index 88a8fb2bbc71..88d400d256c4 100644 --- a/arch/mips/bcm47xx/buttons.c +++ b/arch/mips/bcm47xx/buttons.c @@ -355,7 +355,7 @@ bcm47xx_buttons_luxul_xwr_600_v1[] = { static const struct gpio_keys_button bcm47xx_buttons_luxul_xwr_1750_v1[] = { - BCM47XX_GPIO_KEY(14, BTN_TASK), + BCM47XX_GPIO_KEY(14, KEY_RESTART), }; /* Microsoft */ diff --git a/arch/mips/bcm47xx/leds.c b/arch/mips/bcm47xx/leds.c index 8307a8a02667..34a7b3fbdfd9 100644 --- a/arch/mips/bcm47xx/leds.c +++ b/arch/mips/bcm47xx/leds.c @@ -409,6 +409,12 @@ bcm47xx_leds_luxul_xap_1500_v1[] __initconst = { }; static const struct gpio_led +bcm47xx_leds_luxul_xap1500_v1_extra[] __initconst = { + BCM47XX_GPIO_LED(44, "green", "5ghz", 0, LEDS_GPIO_DEFSTATE_OFF), + BCM47XX_GPIO_LED(76, "green", "2ghz", 0, LEDS_GPIO_DEFSTATE_OFF), +}; + +static const struct gpio_led bcm47xx_leds_luxul_xbr_4400_v1[] __initconst = { BCM47XX_GPIO_LED(12, "green", "usb", 0, LEDS_GPIO_DEFSTATE_OFF), BCM47XX_GPIO_LED_TRIGGER(15, "green", "status", 0, "timer"), @@ -435,6 +441,11 @@ bcm47xx_leds_luxul_xwr_1750_v1[] __initconst = { BCM47XX_GPIO_LED(15, "green", "wps", 0, LEDS_GPIO_DEFSTATE_OFF), }; +static const struct gpio_led +bcm47xx_leds_luxul_xwr1750_v1_extra[] __initconst = { + BCM47XX_GPIO_LED(76, "green", "2ghz", 0, LEDS_GPIO_DEFSTATE_OFF), +}; + /* Microsoft */ static const struct gpio_led @@ -528,6 +539,12 @@ static struct gpio_led_platform_data bcm47xx_leds_pdata; bcm47xx_leds_pdata.num_leds = ARRAY_SIZE(dev_leds); \ } while (0) +static struct gpio_led_platform_data bcm47xx_leds_pdata_extra __initdata = {}; +#define bcm47xx_set_pdata_extra(dev_leds) do { \ + bcm47xx_leds_pdata_extra.leds = dev_leds; \ + bcm47xx_leds_pdata_extra.num_leds = ARRAY_SIZE(dev_leds); \ +} while (0) + void __init bcm47xx_leds_register(void) { enum bcm47xx_board board = bcm47xx_board_get(); @@ -705,6 +722,7 @@ void __init bcm47xx_leds_register(void) break; case BCM47XX_BOARD_LUXUL_XAP_1500_V1: bcm47xx_set_pdata(bcm47xx_leds_luxul_xap_1500_v1); + bcm47xx_set_pdata_extra(bcm47xx_leds_luxul_xap1500_v1_extra); break; case BCM47XX_BOARD_LUXUL_XBR_4400_V1: bcm47xx_set_pdata(bcm47xx_leds_luxul_xbr_4400_v1); @@ -717,6 +735,7 @@ void __init bcm47xx_leds_register(void) break; case BCM47XX_BOARD_LUXUL_XWR_1750_V1: bcm47xx_set_pdata(bcm47xx_leds_luxul_xwr_1750_v1); + bcm47xx_set_pdata_extra(bcm47xx_leds_luxul_xwr1750_v1_extra); break; case BCM47XX_BOARD_MICROSOFT_MN700: @@ -760,4 +779,6 @@ void __init bcm47xx_leds_register(void) } gpio_led_register_device(-1, &bcm47xx_leds_pdata); + if (bcm47xx_leds_pdata_extra.num_leds) + gpio_led_register_device(0, &bcm47xx_leds_pdata_extra); } diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile index e2c6f131c8eb..1e79cab8e269 100644 --- a/arch/mips/boot/dts/Makefile +++ b/arch/mips/boot/dts/Makefile @@ -4,6 +4,7 @@ subdir-y += cavium-octeon subdir-y += img subdir-y += ingenic subdir-y += lantiq +subdir-y += mscc subdir-y += mti subdir-y += netlogic subdir-y += ni diff --git a/arch/mips/boot/dts/brcm/bcm7125.dtsi b/arch/mips/boot/dts/brcm/bcm7125.dtsi index 2f9ef565e5d0..5bf77b6fcceb 100644 --- a/arch/mips/boot/dts/brcm/bcm7125.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7125.dtsi @@ -198,6 +198,13 @@ status = "disabled"; }; + watchdog: watchdog@4067e8 { + clocks = <&upg_clk>; + compatible = "brcm,bcm7038-wdt"; + reg = <0x4067e8 0x14>; + status = "disabled"; + }; + upg_gio: gpio@406700 { compatible = "brcm,brcmstb-gpio"; reg = <0x406700 0x80>; diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi index 02e426fe6013..2afa0dada575 100644 --- a/arch/mips/boot/dts/brcm/bcm7346.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi @@ -233,6 +233,13 @@ status = "disabled"; }; + watchdog: watchdog@4067e8 { + clocks = <&upg_clk>; + compatible = "brcm,bcm7038-wdt"; + reg = <0x4067e8 0x14>; + status = "disabled"; + }; + aon_pm_l2_intc: interrupt-controller@408440 { compatible = "brcm,l2-intc"; reg = <0x408440 0x30>; @@ -243,6 +250,17 @@ brcm,irq-can-wake; }; + aon_ctrl: syscon@408000 { + compatible = "brcm,brcmstb-aon-ctrl"; + reg = <0x408000 0x100>, <0x408200 0x200>; + reg-names = "aon-ctrl", "aon-sram"; + }; + + timers: timer@4067c0 { + compatible = "brcm,brcmstb-timers"; + reg = <0x4067c0 0x40>; + }; + upg_gio: gpio@406700 { compatible = "brcm,brcmstb-gpio"; reg = <0x406700 0x60>; @@ -483,5 +501,49 @@ interrupt-names = "mspi_done"; status = "disabled"; }; + + waketimer: waketimer@408e80 { + compatible = "brcm,brcmstb-waketimer"; + reg = <0x408e80 0x14>; + interrupts = <0x3>; + interrupt-parent = <&aon_pm_l2_intc>; + interrupt-names = "timer"; + clocks = <&upg_clk>; + status = "disabled"; + }; + }; + + memory_controllers { + compatible = "simple-bus"; + ranges = <0x0 0x103b0000 0xa000>; + #address-cells = <1>; + #size-cells = <1>; + + memory-controller@0 { + compatible = "brcm,brcmstb-memc", "simple-bus"; + ranges = <0x0 0x0 0xa000>; + #address-cells = <1>; + #size-cells = <1>; + + memc-arb@1000 { + compatible = "brcm,brcmstb-memc-arb"; + reg = <0x1000 0x248>; + }; + + memc-ddr@2000 { + compatible = "brcm,brcmstb-memc-ddr"; + reg = <0x2000 0x300>; + }; + + ddr-phy@6000 { + compatible = "brcm,brcmstb-ddr-phy"; + reg = <0x6000 0xc8>; + }; + + shimphy@8000 { + compatible = "brcm,brcmstb-ddr-shimphy"; + reg = <0x8000 0x13c>; + }; + }; }; }; diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi index 1089d6ebc841..6375fc77f389 100644 --- a/arch/mips/boot/dts/brcm/bcm7358.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi @@ -217,6 +217,13 @@ status = "disabled"; }; + watchdog: watchdog@4066a8 { + clocks = <&upg_clk>; + compatible = "brcm,bcm7038-wdt"; + reg = <0x4066a8 0x14>; + status = "disabled"; + }; + aon_pm_l2_intc: interrupt-controller@408240 { compatible = "brcm,l2-intc"; reg = <0x408240 0x30>; @@ -362,5 +369,15 @@ interrupt-names = "mspi_done"; status = "disabled"; }; + + waketimer: waketimer@408e80 { + compatible = "brcm,brcmstb-waketimer"; + reg = <0x408e80 0x14>; + interrupts = <0x3>; + interrupt-parent = <&aon_pm_l2_intc>; + interrupt-names = "timer"; + clocks = <&upg_clk>; + status = "disabled"; + }; }; }; diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi index 4b87ebec407a..a57cacea91cf 100644 --- a/arch/mips/boot/dts/brcm/bcm7360.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi @@ -209,6 +209,13 @@ status = "disabled"; }; + watchdog: watchdog@4066a8 { + clocks = <&upg_clk>; + compatible = "brcm,bcm7038-wdt"; + reg = <0x4066a8 0x14>; + status = "disabled"; + }; + aon_pm_l2_intc: interrupt-controller@408440 { compatible = "brcm,l2-intc"; reg = <0x408440 0x30>; @@ -219,6 +226,17 @@ brcm,irq-can-wake; }; + aon_ctrl: syscon@408000 { + compatible = "brcm,brcmstb-aon-ctrl"; + reg = <0x408000 0x100>, <0x408200 0x200>; + reg-names = "aon-ctrl", "aon-sram"; + }; + + timers: timer@406680 { + compatible = "brcm,brcmstb-timers"; + reg = <0x406680 0x40>; + }; + upg_gio: gpio@406500 { compatible = "brcm,brcmstb-gpio"; reg = <0x406500 0xa0>; @@ -402,5 +420,49 @@ interrupt-names = "mspi_done"; status = "disabled"; }; + + waketimer: waketimer@408e80 { + compatible = "brcm,brcmstb-waketimer"; + reg = <0x408e80 0x14>; + interrupts = <0x3>; + interrupt-parent = <&aon_pm_l2_intc>; + interrupt-names = "timer"; + clocks = <&upg_clk>; + status = "disabled"; + }; + }; + + memory_controllers { + compatible = "simple-bus"; + ranges = <0x0 0x103b0000 0xa000>; + #address-cells = <1>; + #size-cells = <1>; + + memory-controller@0 { + compatible = "brcm,brcmstb-memc", "simple-bus"; + ranges = <0x0 0x0 0xa000>; + #address-cells = <1>; + #size-cells = <1>; + + memc-arb@1000 { + compatible = "brcm,brcmstb-memc-arb"; + reg = <0x1000 0x248>; + }; + + memc-ddr@2000 { + compatible = "brcm,brcmstb-memc-ddr"; + reg = <0x2000 0x300>; + }; + + ddr-phy@6000 { + compatible = "brcm,brcmstb-ddr-phy"; + reg = <0x6000 0xc8>; + }; + + shimphy@8000 { + compatible = "brcm,brcmstb-ddr-shimphy"; + reg = <0x8000 0x13c>; + }; + }; }; }; diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi index ca657df34b6d..728b9e9f84b8 100644 --- a/arch/mips/boot/dts/brcm/bcm7362.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi @@ -205,6 +205,13 @@ status = "disabled"; }; + watchdog: watchdog@4066a8 { + clocks = <&upg_clk>; + compatible = "brcm,bcm7038-wdt"; + reg = <0x4066a8 0x14>; + status = "disabled"; + }; + aon_pm_l2_intc: interrupt-controller@408440 { compatible = "brcm,l2-intc"; reg = <0x408440 0x30>; @@ -215,6 +222,17 @@ brcm,irq-can-wake; }; + aon_ctrl: syscon@408000 { + compatible = "brcm,brcmstb-aon-ctrl"; + reg = <0x408000 0x100>, <0x408200 0x200>; + reg-names = "aon-ctrl", "aon-sram"; + }; + + timers: timer@406680 { + compatible = "brcm,brcmstb-timers"; + reg = <0x406680 0x40>; + }; + upg_gio: gpio@406500 { compatible = "brcm,brcmstb-gpio"; reg = <0x406500 0xa0>; @@ -398,5 +416,49 @@ interrupt-names = "mspi_done"; status = "disabled"; }; + + waketimer: waketimer@408e80 { + compatible = "brcm,brcmstb-waketimer"; + reg = <0x408e80 0x14>; + interrupts = <0x3>; + interrupt-parent = <&aon_pm_l2_intc>; + interrupt-names = "timer"; + clocks = <&upg_clk>; + status = "disabled"; + }; + }; + + memory_controllers { + compatible = "simple-bus"; + ranges = <0x0 0x103b0000 0xa000>; + #address-cells = <1>; + #size-cells = <1>; + + memory-controller@0 { + compatible = "brcm,brcmstb-memc", "simple-bus"; + ranges = <0x0 0x0 0xa000>; + #address-cells = <1>; + #size-cells = <1>; + + memc-arb@1000 { + compatible = "brcm,brcmstb-memc-arb"; + reg = <0x1000 0x248>; + }; + + memc-ddr@2000 { + compatible = "brcm,brcmstb-memc-ddr"; + reg = <0x2000 0x300>; + }; + + ddr-phy@6000 { + compatible = "brcm,brcmstb-ddr-phy"; + reg = <0x6000 0xc8>; + }; + + shimphy@8000 { + compatible = "brcm,brcmstb-ddr-shimphy"; + reg = <0x8000 0x13c>; + }; + }; }; }; diff --git a/arch/mips/boot/dts/brcm/bcm7420.dtsi b/arch/mips/boot/dts/brcm/bcm7420.dtsi index d262e11bc3f9..9540c27f12e7 100644 --- a/arch/mips/boot/dts/brcm/bcm7420.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7420.dtsi @@ -214,6 +214,13 @@ status = "disabled"; }; + watchdog: watchdog@4067e8 { + clocks = <&upg_clk>; + compatible = "brcm,bcm7038-wdt"; + reg = <0x4067e8 0x14>; + status = "disabled"; + }; + upg_gio: gpio@406700 { compatible = "brcm,brcmstb-gpio"; reg = <0x406700 0x80>; diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi index e4fb9b6e6dce..410e61ebaf9e 100644 --- a/arch/mips/boot/dts/brcm/bcm7425.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi @@ -232,6 +232,13 @@ status = "disabled"; }; + watchdog: watchdog@4067e8 { + clocks = <&upg_clk>; + compatible = "brcm,bcm7038-wdt"; + reg = <0x4067e8 0x14>; + status = "disabled"; + }; + aon_pm_l2_intc: interrupt-controller@408440 { compatible = "brcm,l2-intc"; reg = <0x408440 0x30>; @@ -242,6 +249,17 @@ brcm,irq-can-wake; }; + aon_ctrl: syscon@408000 { + compatible = "brcm,brcmstb-aon-ctrl"; + reg = <0x408000 0x100>, <0x408200 0x200>; + reg-names = "aon-ctrl", "aon-sram"; + }; + + timers: timer@4067c0 { + compatible = "brcm,brcmstb-timers"; + reg = <0x4067c0 0x40>; + }; + upg_gio: gpio@406700 { compatible = "brcm,brcmstb-gpio"; reg = <0x406700 0x80>; @@ -494,5 +512,76 @@ interrupt-names = "mspi_done"; status = "disabled"; }; + + waketimer: waketimer@409580 { + compatible = "brcm,brcmstb-waketimer"; + reg = <0x409580 0x14>; + interrupts = <0x3>; + interrupt-parent = <&aon_pm_l2_intc>; + interrupt-names = "timer"; + clocks = <&upg_clk>; + status = "disabled"; + }; + }; + + memory_controllers { + compatible = "simple-bus"; + ranges = <0x0 0x103b0000 0x1a000>; + #address-cells = <1>; + #size-cells = <1>; + + memory-controller@0 { + compatible = "brcm,brcmstb-memc", "simple-bus"; + ranges = <0x0 0x0 0xa000>; + #address-cells = <1>; + #size-cells = <1>; + + memc-arb@1000 { + compatible = "brcm,brcmstb-memc-arb"; + reg = <0x1000 0x248>; + }; + + memc-ddr@2000 { + compatible = "brcm,brcmstb-memc-ddr"; + reg = <0x2000 0x300>; + }; + + ddr-phy@6000 { + compatible = "brcm,brcmstb-ddr-phy"; + reg = <0x6000 0xc8>; + }; + + shimphy@8000 { + compatible = "brcm,brcmstb-ddr-shimphy"; + reg = <0x8000 0x13c>; + }; + }; + + memory-controller@1 { + compatible = "brcm,brcmstb-memc", "simple-bus"; + ranges = <0x0 0x10000 0xa000>; + #address-cells = <1>; + #size-cells = <1>; + + memc-arb@1000 { + compatible = "brcm,brcmstb-memc-arb"; + reg = <0x1000 0x248>; + }; + + memc-ddr@2000 { + compatible = "brcm,brcmstb-memc-ddr"; + reg = <0x2000 0x300>; + }; + + ddr-phy@6000 { + compatible = "brcm,brcmstb-ddr-phy"; + reg = <0x6000 0xc8>; + }; + + shimphy@8000 { + compatible = "brcm,brcmstb-ddr-shimphy"; + reg = <0x8000 0x13c>; + }; + }; }; }; diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi index 1484e8990e52..8398b7f68bf4 100644 --- a/arch/mips/boot/dts/brcm/bcm7435.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi @@ -247,6 +247,13 @@ status = "disabled"; }; + watchdog: watchdog@4067e8 { + clocks = <&upg_clk>; + compatible = "brcm,bcm7038-wdt"; + reg = <0x4067e8 0x14>; + status = "disabled"; + }; + aon_pm_l2_intc: interrupt-controller@408440 { compatible = "brcm,l2-intc"; reg = <0x408440 0x30>; @@ -257,6 +264,17 @@ brcm,irq-can-wake; }; + aon_ctrl: syscon@408000 { + compatible = "brcm,brcmstb-aon-ctrl"; + reg = <0x408000 0x100>, <0x408200 0x200>; + reg-names = "aon-ctrl", "aon-sram"; + }; + + timers: timer@4067c0 { + compatible = "brcm,brcmstb-timers"; + reg = <0x4067c0 0x40>; + }; + upg_gio: gpio@406700 { compatible = "brcm,brcmstb-gpio"; reg = <0x406700 0x80>; @@ -509,5 +527,76 @@ interrupt-names = "mspi_done"; status = "disabled"; }; + + waketimer: waketimer@409580 { + compatible = "brcm,brcmstb-waketimer"; + reg = <0x409580 0x14>; + interrupts = <0x3>; + interrupt-parent = <&aon_pm_l2_intc>; + interrupt-names = "timer"; + clocks = <&upg_clk>; + status = "disabled"; + }; + }; + + memory_controllers { + compatible = "simple-bus"; + ranges = <0x0 0x103b0000 0x1a000>; + #address-cells = <1>; + #size-cells = <1>; + + memory-controller@0 { + compatible = "brcm,brcmstb-memc", "simple-bus"; + ranges = <0x0 0x0 0xa000>; + #address-cells = <1>; + #size-cells = <1>; + + memc-arb@1000 { + compatible = "brcm,brcmstb-memc-arb"; + reg = <0x1000 0x248>; + }; + + memc-ddr@2000 { + compatible = "brcm,brcmstb-memc-ddr"; + reg = <0x2000 0x300>; + }; + + ddr-phy@6000 { + compatible = "brcm,brcmstb-ddr-phy"; + reg = <0x6000 0xc8>; + }; + + shimphy@8000 { + compatible = "brcm,brcmstb-ddr-shimphy"; + reg = <0x8000 0x13c>; + }; + }; + + memory-controller@1 { + compatible = "brcm,brcmstb-memc", "simple-bus"; + ranges = <0x0 0x10000 0xa000>; + #address-cells = <1>; + #size-cells = <1>; + + memc-arb@1000 { + compatible = "brcm,brcmstb-memc-arb"; + reg = <0x1000 0x248>; + }; + + memc-ddr@2000 { + compatible = "brcm,brcmstb-memc-ddr"; + reg = <0x2000 0x300>; + }; + + ddr-phy@6000 { + compatible = "brcm,brcmstb-ddr-phy"; + reg = <0x6000 0xc8>; + }; + + shimphy@8000 { + compatible = "brcm,brcmstb-ddr-shimphy"; + reg = <0x8000 0x13c>; + }; + }; }; }; diff --git a/arch/mips/boot/dts/brcm/bcm97125cbmb.dts b/arch/mips/boot/dts/brcm/bcm97125cbmb.dts index 7f59ea2ded6c..79e9769f7e00 100644 --- a/arch/mips/boot/dts/brcm/bcm97125cbmb.dts +++ b/arch/mips/boot/dts/brcm/bcm97125cbmb.dts @@ -50,6 +50,10 @@ status = "okay"; }; +&watchdog { + status = "okay"; +}; + /* FIXME: USB is wonky; disable it for now */ &ehci0 { status = "disabled"; diff --git a/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts b/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts index 9e7d5228f2b7..28370ff77eeb 100644 --- a/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts +++ b/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts @@ -59,6 +59,10 @@ status = "okay"; }; +&watchdog { + status = "okay"; +}; + &enet0 { status = "okay"; }; @@ -114,3 +118,7 @@ &mspi { status = "okay"; }; + +&waketimer { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/brcm/bcm97358svmb.dts b/arch/mips/boot/dts/brcm/bcm97358svmb.dts index 708207a0002d..41c1b510c230 100644 --- a/arch/mips/boot/dts/brcm/bcm97358svmb.dts +++ b/arch/mips/boot/dts/brcm/bcm97358svmb.dts @@ -55,6 +55,10 @@ status = "okay"; }; +&watchdog { + status = "okay"; +}; + &enet0 { status = "okay"; }; @@ -106,3 +110,7 @@ &mspi { status = "okay"; }; + +&waketimer { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/brcm/bcm97360svmb.dts b/arch/mips/boot/dts/brcm/bcm97360svmb.dts index 73c6dc9c8c6d..9f6c6c9b7ea7 100644 --- a/arch/mips/boot/dts/brcm/bcm97360svmb.dts +++ b/arch/mips/boot/dts/brcm/bcm97360svmb.dts @@ -50,6 +50,10 @@ status = "okay"; }; +&watchdog { + status = "okay"; +}; + &enet0 { status = "okay"; }; @@ -109,3 +113,7 @@ &mspi { status = "okay"; }; + +&waketimer { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/brcm/bcm97362svmb.dts b/arch/mips/boot/dts/brcm/bcm97362svmb.dts index 37bacfdcf9d9..df8b755c390f 100644 --- a/arch/mips/boot/dts/brcm/bcm97362svmb.dts +++ b/arch/mips/boot/dts/brcm/bcm97362svmb.dts @@ -47,6 +47,10 @@ status = "okay"; }; +&watchdog { + status = "okay"; +}; + &enet0 { status = "okay"; }; @@ -78,3 +82,7 @@ &mspi { status = "okay"; }; + +&waketimer { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/brcm/bcm97420c.dts b/arch/mips/boot/dts/brcm/bcm97420c.dts index f96241e94874..086faeaa384a 100644 --- a/arch/mips/boot/dts/brcm/bcm97420c.dts +++ b/arch/mips/boot/dts/brcm/bcm97420c.dts @@ -60,6 +60,10 @@ status = "okay"; }; +&watchdog { + status = "okay"; +}; + /* FIXME: MAC driver comes up but cannot attach to PHY */ &enet0 { status = "disabled"; diff --git a/arch/mips/boot/dts/brcm/bcm97425svmb.dts b/arch/mips/boot/dts/brcm/bcm97425svmb.dts index ce762c7b2e54..0ed22217bf3a 100644 --- a/arch/mips/boot/dts/brcm/bcm97425svmb.dts +++ b/arch/mips/boot/dts/brcm/bcm97425svmb.dts @@ -61,6 +61,10 @@ status = "okay"; }; +&watchdog { + status = "okay"; +}; + &enet0 { status = "okay"; }; @@ -144,3 +148,7 @@ &mspi { status = "okay"; }; + +&waketimer { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/brcm/bcm97435svmb.dts b/arch/mips/boot/dts/brcm/bcm97435svmb.dts index d4dd31a543fd..2c145a883aef 100644 --- a/arch/mips/boot/dts/brcm/bcm97435svmb.dts +++ b/arch/mips/boot/dts/brcm/bcm97435svmb.dts @@ -61,6 +61,10 @@ status = "okay"; }; +&watchdog { + status = "okay"; +}; + &enet0 { status = "okay"; }; @@ -120,3 +124,7 @@ &mspi { status = "okay"; }; + +&waketimer { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/img/boston.dts b/arch/mips/boot/dts/img/boston.dts index 2cd49b60e030..1bd105428f61 100644 --- a/arch/mips/boot/dts/img/boston.dts +++ b/arch/mips/boot/dts/img/boston.dts @@ -157,7 +157,7 @@ #address-cells = <1>; #size-cells = <0>; - rtc@0x68 { + rtc@68 { compatible = "st,m41t81s"; reg = <0x68>; }; diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index a4cc52214dbd..38078594cf97 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -110,22 +110,22 @@ reg = <0x0 0x0 0x0 0x800000>; }; - partition@0x800000 { + partition@800000 { label = "u-boot"; reg = <0x0 0x800000 0x0 0x200000>; }; - partition@0xa00000 { + partition@a00000 { label = "u-boot-env"; reg = <0x0 0xa00000 0x0 0x200000>; }; - partition@0xc00000 { + partition@c00000 { label = "boot"; reg = <0x0 0xc00000 0x0 0x4000000>; }; - partition@0x8c00000 { + partition@4c00000 { label = "system"; reg = <0x0 0x4c00000 0x1 0xfb400000>; }; diff --git a/arch/mips/boot/dts/mscc/Makefile b/arch/mips/boot/dts/mscc/Makefile new file mode 100644 index 000000000000..c51164537c02 --- /dev/null +++ b/arch/mips/boot/dts/mscc/Makefile @@ -0,0 +1,3 @@ +dtb-$(CONFIG_LEGACY_BOARD_OCELOT) += ocelot_pcb123.dtb + +obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi new file mode 100644 index 000000000000..dd239cab2f9d --- /dev/null +++ b/arch/mips/boot/dts/mscc/ocelot.dtsi @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2017 Microsemi Corporation */ + +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "mscc,ocelot"; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "mips,mips24KEc"; + device_type = "cpu"; + clocks = <&cpu_clk>; + reg = <0>; + }; + }; + + aliases { + serial0 = &uart0; + }; + + cpuintc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + compatible = "mti,cpu-interrupt-controller"; + }; + + cpu_clk: cpu-clock { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <500000000>; + }; + + ahb_clk: ahb-clk { + compatible = "fixed-factor-clock"; + #clock-cells = <0>; + clocks = <&cpu_clk>; + clock-div = <2>; + clock-mult = <1>; + }; + + ahb@70000000 { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0x70000000 0x2000000>; + + interrupt-parent = <&intc>; + + cpu_ctrl: syscon@0 { + compatible = "mscc,ocelot-cpu-syscon", "syscon"; + reg = <0x0 0x2c>; + }; + + intc: interrupt-controller@70 { + compatible = "mscc,ocelot-icpu-intr"; + reg = <0x70 0x70>; + #interrupt-cells = <1>; + interrupt-controller; + interrupt-parent = <&cpuintc>; + interrupts = <2>; + }; + + uart0: serial@100000 { + pinctrl-0 = <&uart_pins>; + pinctrl-names = "default"; + compatible = "ns16550a"; + reg = <0x100000 0x20>; + interrupts = <6>; + clocks = <&ahb_clk>; + reg-io-width = <4>; + reg-shift = <2>; + + status = "disabled"; + }; + + uart2: serial@100800 { + pinctrl-0 = <&uart2_pins>; + pinctrl-names = "default"; + compatible = "ns16550a"; + reg = <0x100800 0x20>; + interrupts = <7>; + clocks = <&ahb_clk>; + reg-io-width = <4>; + reg-shift = <2>; + + status = "disabled"; + }; + + reset@1070008 { + compatible = "mscc,ocelot-chip-reset"; + reg = <0x1070008 0x4>; + }; + + gpio: pinctrl@1070034 { + compatible = "mscc,ocelot-pinctrl"; + reg = <0x1070034 0x68>; + gpio-controller; + #gpio-cells = <2>; + gpio-ranges = <&gpio 0 0 22>; + + uart_pins: uart-pins { + pins = "GPIO_6", "GPIO_7"; + function = "uart"; + }; + + uart2_pins: uart2-pins { + pins = "GPIO_12", "GPIO_13"; + function = "uart2"; + }; + }; + }; +}; diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts new file mode 100644 index 000000000000..29d6414f8886 --- /dev/null +++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2017 Microsemi Corporation */ + +/dts-v1/; + +#include "ocelot.dtsi" + +/ { + compatible = "mscc,ocelot-pcb123", "mscc,ocelot"; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + memory@0 { + device_type = "memory"; + reg = <0x0 0x0e000000>; + }; +}; + +&uart0 { + status = "okay"; +}; + +&uart2 { + status = "okay"; +}; diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index d99f5242169e..b3aec101a65d 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2271,7 +2271,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, parent_irq = irq_of_parse_and_map(ciu_node, 0); if (!parent_irq) { - pr_err("ERROR: Couldn't acquire parent_irq for %s\n.", + pr_err("ERROR: Couldn't acquire parent_irq for %s\n", ciu_node->name); return -EINVAL; } @@ -2283,7 +2283,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, addr = of_get_address(ciu_node, 0, NULL, NULL); if (!addr) { - pr_err("ERROR: Couldn't acquire reg(0) %s\n.", ciu_node->name); + pr_err("ERROR: Couldn't acquire reg(0) %s\n", ciu_node->name); return -EINVAL; } host_data->raw_reg = (u64)phys_to_virt( @@ -2291,7 +2291,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, addr = of_get_address(ciu_node, 1, NULL, NULL); if (!addr) { - pr_err("ERROR: Couldn't acquire reg(1) %s\n.", ciu_node->name); + pr_err("ERROR: Couldn't acquire reg(1) %s\n", ciu_node->name); return -EINVAL; } host_data->en_reg = (u64)phys_to_virt( @@ -2299,7 +2299,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, r = of_property_read_u32(ciu_node, "cavium,max-bits", &val); if (r) { - pr_err("ERROR: Couldn't read cavium,max-bits from %s\n.", + pr_err("ERROR: Couldn't read cavium,max-bits from %s\n", ciu_node->name); return r; } @@ -2309,7 +2309,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, &octeon_irq_domain_cib_ops, host_data); if (!cib_domain) { - pr_err("ERROR: Couldn't irq_domain_add_linear()\n."); + pr_err("ERROR: Couldn't irq_domain_add_linear()\n"); return -ENOMEM; } diff --git a/arch/mips/configs/bmips_stb_defconfig b/arch/mips/configs/bmips_stb_defconfig index 3cefa6bc01dd..47aecb8750e6 100644 --- a/arch/mips/configs/bmips_stb_defconfig +++ b/arch/mips/configs/bmips_stb_defconfig @@ -72,6 +72,7 @@ CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y +CONFIG_SOC_BRCMSTB=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y diff --git a/arch/mips/configs/generic/32r6.config b/arch/mips/configs/generic/32r6.config index ca606e71f4d0..1a5d5ea4ab2b 100644 --- a/arch/mips/configs/generic/32r6.config +++ b/arch/mips/configs/generic/32r6.config @@ -1,2 +1,4 @@ CONFIG_CPU_MIPS32_R6=y CONFIG_HIGHMEM=y + +CONFIG_CRYPTO_CRC32_MIPS=y diff --git a/arch/mips/configs/generic/64r6.config b/arch/mips/configs/generic/64r6.config index 7cac0339c4d5..5dd8e8503e34 100644 --- a/arch/mips/configs/generic/64r6.config +++ b/arch/mips/configs/generic/64r6.config @@ -2,3 +2,5 @@ CONFIG_CPU_MIPS64_R6=y CONFIG_64BIT=y CONFIG_MIPS32_O32=y CONFIG_MIPS32_N32=y + +CONFIG_CRYPTO_CRC32_MIPS=y diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config new file mode 100644 index 000000000000..aa815761d85e --- /dev/null +++ b/arch/mips/configs/generic/board-ocelot.config @@ -0,0 +1,35 @@ +# require CONFIG_CPU_MIPS32_R2=y + +CONFIG_LEGACY_BOARD_OCELOT=y + +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_M25P80=y +CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_PLATFORM=y +CONFIG_MTD_SPI_NOR=y +CONFIG_MTD_UBI=y + +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y + +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y + +CONFIG_GPIO_SYSFS=y + +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_MUX=y + +CONFIG_SPI=y +CONFIG_SPI_BITBANG=y +CONFIG_SPI_DESIGNWARE=y +CONFIG_SPI_SPIDEV=y + +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_OCELOT_RESET=y + +CONFIG_MAGIC_SYSRQ=y diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile new file mode 100644 index 000000000000..e07aca572c2e --- /dev/null +++ b/arch/mips/crypto/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for MIPS crypto files.. +# + +obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c new file mode 100644 index 000000000000..7d1d2425746f --- /dev/null +++ b/arch/mips/crypto/crc32-mips.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions + * + * Module based on arm64/crypto/crc32-arm.c + * + * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org> + * Copyright (C) 2018 MIPS Tech, LLC + */ + +#include <linux/unaligned/access_ok.h> +#include <linux/cpufeature.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> +#include <asm/mipsregs.h> + +#include <crypto/internal/hash.h> + +enum crc_op_size { + b, h, w, d, +}; + +enum crc_type { + crc32, + crc32c, +}; + +#ifndef TOOLCHAIN_SUPPORTS_CRC +#define _ASM_MACRO_CRC32(OP, SZ, TYPE) \ +_ASM_MACRO_3R(OP, rt, rs, rt2, \ + ".ifnc \\rt, \\rt2\n\t" \ + ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \ + ".endif\n\t" \ + _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) | \ + ((SZ) << 6) | ((TYPE) << 8)) \ + _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \ + ((SZ) << 14) | ((TYPE) << 3))) +_ASM_MACRO_CRC32(crc32b, 0, 0); +_ASM_MACRO_CRC32(crc32h, 1, 0); +_ASM_MACRO_CRC32(crc32w, 2, 0); +_ASM_MACRO_CRC32(crc32d, 3, 0); +_ASM_MACRO_CRC32(crc32cb, 0, 1); +_ASM_MACRO_CRC32(crc32ch, 1, 1); +_ASM_MACRO_CRC32(crc32cw, 2, 1); +_ASM_MACRO_CRC32(crc32cd, 3, 1); +#define _ASM_SET_CRC "" +#else /* !TOOLCHAIN_SUPPORTS_CRC */ +#define _ASM_SET_CRC ".set\tcrc\n\t" +#endif + +#define _CRC32(crc, value, size, type) \ +do { \ + __asm__ __volatile__( \ + ".set push\n\t" \ + _ASM_SET_CRC \ + #type #size " %0, %1, %0\n\t" \ + ".set pop" \ + : "+r" (crc) \ + : "r" (value)); \ +} while (0) + +#define CRC32(crc, value, size) \ + _CRC32(crc, value, size, crc32) + +#define CRC32C(crc, value, size) \ + _CRC32(crc, value, size, crc32c) + +static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len) +{ + u32 crc = crc_; + +#ifdef CONFIG_64BIT + while (len >= sizeof(u64)) { + u64 value = get_unaligned_le64(p); + + CRC32(crc, value, d); + p += sizeof(u64); + len -= sizeof(u64); + } + + if (len & sizeof(u32)) { +#else /* !CONFIG_64BIT */ + while (len >= sizeof(u32)) { +#endif + u32 value = get_unaligned_le32(p); + + CRC32(crc, value, w); + p += sizeof(u32); + len -= sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value = get_unaligned_le16(p); + + CRC32(crc, value, h); + p += sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value = *p++; + + CRC32(crc, value, b); + } + + return crc; +} + +static u32 crc32c_mips_le_hw(u32 crc_, const u8 *p, unsigned int len) +{ + u32 crc = crc_; + +#ifdef CONFIG_64BIT + while (len >= sizeof(u64)) { + u64 value = get_unaligned_le64(p); + + CRC32C(crc, value, d); + p += sizeof(u64); + len -= sizeof(u64); + } + + if (len & sizeof(u32)) { +#else /* !CONFIG_64BIT */ + while (len >= sizeof(u32)) { +#endif + u32 value = get_unaligned_le32(p); + + CRC32C(crc, value, w); + p += sizeof(u32); + len -= sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value = get_unaligned_le16(p); + + CRC32C(crc, value, h); + p += sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value = *p++; + + CRC32C(crc, value, b); + } + return crc; +} + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +struct chksum_ctx { + u32 key; +}; + +struct chksum_desc_ctx { + u32 crc; +}; + +static int chksum_init(struct shash_desc *desc) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = mctx->key; + + return 0; +} + +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set + * the seed. + */ +static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(tfm); + + if (keylen != sizeof(mctx->key)) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + mctx->key = get_unaligned_le32(key); + return 0; +} + +static int chksum_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = crc32_mips_le_hw(ctx->crc, data, length); + return 0; +} + +static int chksumc_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = crc32c_mips_le_hw(ctx->crc, data, length); + return 0; +} + +static int chksum_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + put_unaligned_le32(ctx->crc, out); + return 0; +} + +static int chksumc_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + put_unaligned_le32(~ctx->crc, out); + return 0; +} + +static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) +{ + put_unaligned_le32(crc32_mips_le_hw(crc, data, len), out); + return 0; +} + +static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) +{ + put_unaligned_le32(~crc32c_mips_le_hw(crc, data, len), out); + return 0; +} + +static int chksum_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(ctx->crc, data, len, out); +} + +static int chksumc_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksumc_finup(ctx->crc, data, len, out); +} + +static int chksum_digest(struct shash_desc *desc, const u8 *data, + unsigned int length, u8 *out) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); + + return __chksum_finup(mctx->key, data, length, out); +} + +static int chksumc_digest(struct shash_desc *desc, const u8 *data, + unsigned int length, u8 *out) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); + + return __chksumc_finup(mctx->key, data, length, out); +} + +static int chksum_cra_init(struct crypto_tfm *tfm) +{ + struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); + + mctx->key = ~0; + return 0; +} + +static struct shash_alg crc32_alg = { + .digestsize = CHKSUM_DIGEST_SIZE, + .setkey = chksum_setkey, + .init = chksum_init, + .update = chksum_update, + .final = chksum_final, + .finup = chksum_finup, + .digest = chksum_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crc32", + .cra_driver_name = "crc32-mips-hw", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_alignmask = 0, + .cra_ctxsize = sizeof(struct chksum_ctx), + .cra_module = THIS_MODULE, + .cra_init = chksum_cra_init, + } +}; + +static struct shash_alg crc32c_alg = { + .digestsize = CHKSUM_DIGEST_SIZE, + .setkey = chksum_setkey, + .init = chksum_init, + .update = chksumc_update, + .final = chksumc_final, + .finup = chksumc_finup, + .digest = chksumc_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crc32c", + .cra_driver_name = "crc32c-mips-hw", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_alignmask = 0, + .cra_ctxsize = sizeof(struct chksum_ctx), + .cra_module = THIS_MODULE, + .cra_init = chksum_cra_init, + } +}; + +static int __init crc32_mod_init(void) +{ + int err; + + err = crypto_register_shash(&crc32_alg); + + if (err) + return err; + + err = crypto_register_shash(&crc32c_alg); + + if (err) { + crypto_unregister_shash(&crc32_alg); + return err; + } + + return 0; +} + +static void __exit crc32_mod_exit(void) +{ + crypto_unregister_shash(&crc32_alg); + crypto_unregister_shash(&crc32c_alg); +} + +MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@mips.com"); +MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions"); +MODULE_LICENSE("GPL v2"); + +module_cpu_feature_match(MIPS_CRC32, crc32_mod_init); +module_exit(crc32_mod_exit); diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig index 2ff3b17bfab1..ba9b2c8cce68 100644 --- a/arch/mips/generic/Kconfig +++ b/arch/mips/generic/Kconfig @@ -27,6 +27,22 @@ config LEGACY_BOARD_SEAD3 Enable this to include support for booting on MIPS SEAD-3 FPGA-based development boards, which boot using a legacy boot protocol. +comment "MSCC Ocelot doesn't work with SEAD3 enabled" + depends on LEGACY_BOARD_SEAD3 + +config LEGACY_BOARD_OCELOT + bool "Support MSCC Ocelot boards" + depends on LEGACY_BOARD_SEAD3=n + select LEGACY_BOARDS + select MSCC_OCELOT + +config MSCC_OCELOT + bool + select GPIOLIB + select MSCC_OCELOT_IRQ + select SYS_HAS_EARLY_PRINTK + select USE_GENERIC_EARLY_PRINTK_8250 + comment "FIT/UHI Boards" config FIT_IMAGE_FDT_BOSTON diff --git a/arch/mips/generic/Makefile b/arch/mips/generic/Makefile index 5c31e0c4697d..d03a36f869a4 100644 --- a/arch/mips/generic/Makefile +++ b/arch/mips/generic/Makefile @@ -14,5 +14,6 @@ obj-y += proc.o obj-$(CONFIG_YAMON_DT_SHIM) += yamon-dt.o obj-$(CONFIG_LEGACY_BOARD_SEAD3) += board-sead3.o +obj-$(CONFIG_LEGACY_BOARD_OCELOT) += board-ocelot.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_VIRT_BOARD_RANCHU) += board-ranchu.o diff --git a/arch/mips/generic/board-ocelot.c b/arch/mips/generic/board-ocelot.c new file mode 100644 index 000000000000..06d92fb37769 --- /dev/null +++ b/arch/mips/generic/board-ocelot.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* + * Microsemi MIPS SoC support + * + * Copyright (c) 2017 Microsemi Corporation + */ +#include <asm/machine.h> +#include <asm/prom.h> + +#define DEVCPU_GCB_CHIP_REGS_CHIP_ID 0x71070000 +#define CHIP_ID_PART_ID GENMASK(27, 12) + +#define OCELOT_PART_ID (0x7514 << 12) + +#define UART_UART 0x70100000 + +static __init bool ocelot_detect(void) +{ + u32 rev; + int idx; + + /* Look for the TLB entry set up by redboot before trying to use it */ + write_c0_entryhi(DEVCPU_GCB_CHIP_REGS_CHIP_ID); + mtc0_tlbw_hazard(); + tlb_probe(); + tlb_probe_hazard(); + idx = read_c0_index(); + if (idx < 0) + return 0; + + /* A TLB entry exists, lets assume its usable and check the CHIP ID */ + rev = __raw_readl((void __iomem *)DEVCPU_GCB_CHIP_REGS_CHIP_ID); + + if ((rev & CHIP_ID_PART_ID) != OCELOT_PART_ID) + return 0; + + /* Copy command line from bootloader early for Initrd detection */ + if (fw_arg0 < 10 && (fw_arg1 & 0xFFF00000) == 0x80000000) { + unsigned int prom_argc = fw_arg0; + const char **prom_argv = (const char **)fw_arg1; + + if (prom_argc > 1 && strlen(prom_argv[1]) > 0) + /* ignore all built-in args if any f/w args given */ + strcpy(arcs_cmdline, prom_argv[1]); + } + + return 1; +} + +static void __init ocelot_earlyprintk_init(void) +{ + void __iomem *uart_base; + + uart_base = ioremap_nocache(UART_UART, 0x20); + setup_8250_early_printk_port((unsigned long)uart_base, 2, 50000); +} + +static void __init ocelot_late_init(void) +{ + ocelot_earlyprintk_init(); +} + +static __init const void *ocelot_fixup_fdt(const void *fdt, + const void *match_data) +{ + /* This has to be done so late because ioremap needs to work */ + late_time_init = ocelot_late_init; + + return fdt; +} + +extern char __dtb_ocelot_pcb123_begin[]; + +MIPS_MACHINE(ocelot) = { + .fdt = __dtb_ocelot_pcb123_begin, + .fixup_fdt = ocelot_fixup_fdt, + .detect = ocelot_detect, +}; diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 721b698bfe3c..5f74590e0bea 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -11,6 +11,7 @@ #include <asm/cpu.h> #include <asm/cpu-info.h> +#include <asm/isa-rev.h> #include <cpu-feature-overrides.h> /* @@ -493,7 +494,7 @@ # define cpu_has_perf (cpu_data[0].options & MIPS_CPU_PERF) #endif -#if defined(CONFIG_SMP) && defined(__mips_isa_rev) && (__mips_isa_rev >= 6) +#if defined(CONFIG_SMP) && (MIPS_ISA_REV >= 6) /* * Some systems share FTLB RAMs between threads within a core (siblings in * kernel parlance). This means that FTLB entries may become invalid at almost @@ -525,7 +526,7 @@ # define cpu_has_shared_ftlb_entries \ (current_cpu_data.options & MIPS_CPU_SHARED_FTLB_ENTRIES) # endif -#endif /* SMP && __mips_isa_rev >= 6 */ +#endif /* SMP && MIPS_ISA_REV >= 6 */ #ifndef cpu_has_shared_ftlb_ram # define cpu_has_shared_ftlb_ram 0 diff --git a/arch/mips/include/asm/isa-rev.h b/arch/mips/include/asm/isa-rev.h new file mode 100644 index 000000000000..683ea3454dcb --- /dev/null +++ b/arch/mips/include/asm/isa-rev.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 MIPS Tech, LLC + * Author: Matt Redfearn <matt.redfearn@mips.com> + */ + +#ifndef __MIPS_ASM_ISA_REV_H__ +#define __MIPS_ASM_ISA_REV_H__ + +/* + * The ISA revision level. This is 0 for MIPS I to V and N for + * MIPS{32,64}rN. + */ + +/* If the compiler has defined __mips_isa_rev, believe it. */ +#ifdef __mips_isa_rev +#define MIPS_ISA_REV __mips_isa_rev +#else +/* The compiler hasn't defined the isa rev so assume it's MIPS I - V (0) */ +#define MIPS_ISA_REV 0 +#endif + + +#endif /* __MIPS_ASM_ISA_REV_H__ */ diff --git a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h index aa3800c82332..d99ca862dae3 100644 --- a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h +++ b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h @@ -167,7 +167,7 @@ #define AR71XX_AHB_DIV_MASK 0x7 #define AR724X_PLL_REG_CPU_CONFIG 0x00 -#define AR724X_PLL_REG_PCIE_CONFIG 0x18 +#define AR724X_PLL_REG_PCIE_CONFIG 0x10 #define AR724X_PLL_FB_SHIFT 0 #define AR724X_PLL_FB_MASK 0x3ff diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 858752dac337..f65859784a4c 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -664,6 +664,7 @@ #define MIPS_CONF5_FRE (_ULCAST_(1) << 8) #define MIPS_CONF5_UFE (_ULCAST_(1) << 9) #define MIPS_CONF5_CA2 (_ULCAST_(1) << 14) +#define MIPS_CONF5_CRCP (_ULCAST_(1) << 18) #define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27) #define MIPS_CONF5_EVA (_ULCAST_(1) << 28) #define MIPS_CONF5_CV (_ULCAST_(1) << 29) diff --git a/arch/mips/include/uapi/asm/hwcap.h b/arch/mips/include/uapi/asm/hwcap.h index 600ad8fd6835..a2aba4b059e6 100644 --- a/arch/mips/include/uapi/asm/hwcap.h +++ b/arch/mips/include/uapi/asm/hwcap.h @@ -5,5 +5,6 @@ /* HWCAP flags */ #define HWCAP_MIPS_R6 (1 << 0) #define HWCAP_MIPS_MSA (1 << 1) +#define HWCAP_MIPS_CRC32 (1 << 2) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 606e02ca4b6c..3035ca499cd8 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -50,6 +50,7 @@ #define MAP_NONBLOCK 0x20000 /* do not block on IO */ #define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x80000 /* create a huge page mapping */ +#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ /* * Flags for msync diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index cf3fd549e16d..6b07b739f914 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -848,6 +848,9 @@ static inline unsigned int decode_config5(struct cpuinfo_mips *c) if (config5 & MIPS_CONF5_CA2) c->ases |= MIPS_ASE_MIPS16E2; + if (config5 & MIPS_CONF5_CRCP) + elf_hwcap |= HWCAP_MIPS_CRC32; + return config5 & MIPS_CONF_M; } diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c index 421e06dfee72..55c3fbeb2df6 100644 --- a/arch/mips/kernel/pm-cps.c +++ b/arch/mips/kernel/pm-cps.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/percpu.h> #include <linux/slab.h> +#include <linux/suspend.h> #include <asm/asm-offsets.h> #include <asm/cacheflush.h> @@ -670,6 +671,34 @@ static int cps_pm_online_cpu(unsigned int cpu) return 0; } +static int cps_pm_power_notifier(struct notifier_block *this, + unsigned long event, void *ptr) +{ + unsigned int stat; + + switch (event) { + case PM_SUSPEND_PREPARE: + stat = read_cpc_cl_stat_conf(); + /* + * If we're attempting to suspend the system and power down all + * of the cores, the JTAG detect bit indicates that the CPC will + * instead put the cores into clock-off state. In this state + * a connected debugger can cause the CPU to attempt + * interactions with the powered down system. At best this will + * fail. At worst, it can hang the NoC, requiring a hard reset. + * To avoid this, just block system suspend if a JTAG probe + * is detected. + */ + if (stat & CPC_Cx_STAT_CONF_EJTAG_PROBE) { + pr_warn("JTAG probe is connected - abort suspend\n"); + return NOTIFY_BAD; + } + return NOTIFY_DONE; + default: + return NOTIFY_DONE; + } +} + static int __init cps_pm_init(void) { /* A CM is required for all non-coherent states */ @@ -705,6 +734,8 @@ static int __init cps_pm_init(void) pr_warn("pm-cps: no CPC, clock & power gating unavailable\n"); } + pm_notifier(cps_pm_power_notifier, 0); + return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mips/cps_pm:online", cps_pm_online_cpu, NULL); } diff --git a/arch/mips/kernel/reset.c b/arch/mips/kernel/reset.c index 7c746d3458e7..6288780b779e 100644 --- a/arch/mips/kernel/reset.c +++ b/arch/mips/kernel/reset.c @@ -13,6 +13,9 @@ #include <linux/reboot.h> #include <linux/delay.h> +#include <asm/compiler.h> +#include <asm/idle.h> +#include <asm/mipsregs.h> #include <asm/reboot.h> /* @@ -26,6 +29,62 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); +static void machine_hang(void) +{ + /* + * We're hanging the system so we don't want to be interrupted anymore. + * Any interrupt handlers that ran would at best be useless & at worst + * go awry because the system isn't in a functional state. + */ + local_irq_disable(); + + /* + * Mask all interrupts, giving us a better chance of remaining in the + * low power wait state. + */ + clear_c0_status(ST0_IM); + + while (true) { + if (cpu_has_mips_r) { + /* + * We know that the wait instruction is supported so + * make use of it directly, leaving interrupts + * disabled. + */ + asm volatile( + ".set push\n\t" + ".set " MIPS_ISA_ARCH_LEVEL "\n\t" + "wait\n\t" + ".set pop"); + } else if (cpu_wait) { + /* + * Try the cpu_wait() callback. This isn't ideal since + * it'll re-enable interrupts, but that ought to be + * harmless given that they're all masked. + */ + cpu_wait(); + local_irq_disable(); + } else { + /* + * We're going to burn some power running round the + * loop, but we don't really have a choice. This isn't + * a path we should expect to run for long during + * typical use anyway. + */ + } + + /* + * In most modern MIPS CPUs interrupts will cause the wait + * instruction to graduate even when disabled, and in some + * cases even when masked. In order to prevent a timer + * interrupt from continuously taking us out of the low power + * wait state, we clear any pending timer interrupt here. + */ + if (cpu_has_counter) + write_c0_compare(0); + } +} + void machine_restart(char *command) { if (_machine_restart) @@ -38,8 +97,7 @@ void machine_restart(char *command) do_kernel_restart(command); mdelay(1000); pr_emerg("Reboot failed -- System halted\n"); - local_irq_disable(); - while (1); + machine_hang(); } void machine_halt(void) @@ -51,8 +109,7 @@ void machine_halt(void) preempt_disable(); smp_send_stop(); #endif - local_irq_disable(); - while (1); + machine_hang(); } void machine_power_off(void) @@ -64,6 +121,5 @@ void machine_power_off(void) preempt_disable(); smp_send_stop(); #endif - local_irq_disable(); - while (1); + machine_hang(); } diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 5f8b0a9e30b3..563188ac6fa2 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -155,7 +155,8 @@ void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_add add_memory_region(start, size, BOOT_MEM_RAM); } -bool __init memory_region_available(phys_addr_t start, phys_addr_t size) +static bool __init __maybe_unused memory_region_available(phys_addr_t start, + phys_addr_t size) { int i; bool in_ram = false, free = true; @@ -453,7 +454,7 @@ static void __init bootmem_init(void) pr_info("Wasting %lu bytes for tracking %lu unused pages\n", (min_low_pfn - ARCH_PFN_OFFSET) * sizeof(struct page), min_low_pfn - ARCH_PFN_OFFSET); - } else if (min_low_pfn < ARCH_PFN_OFFSET) { + } else if (ARCH_PFN_OFFSET - min_low_pfn > 0UL) { pr_info("%lu free pages won't be used\n", ARCH_PFN_OFFSET - min_low_pfn); } diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 84b7b592b834..400676ce03f4 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -30,7 +30,6 @@ #include <linux/hardirq.h> #include <linux/gfp.h> #include <linux/kcore.h> -#include <linux/export.h> #include <linux/initrd.h> #include <asm/asm-offsets.h> @@ -46,7 +45,6 @@ #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/fixmap.h> -#include <asm/maar.h> /* * We have up to 8 empty zeroed pages so we can map one of the right colour diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 33d3251ecd37..2f616ebeb7e0 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -24,20 +24,20 @@ EXPORT_SYMBOL(shm_align_mask); #define MIN_GAP (128*1024*1024UL) #define MAX_GAP ((TASK_SIZE)/6*5) -static int mmap_is_legacy(void) +static int mmap_is_legacy(struct rlimit *rlim_stack) { if (current->personality & ADDR_COMPAT_LAYOUT) return 1; - if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + if (rlim_stack->rlim_cur == RLIM_INFINITY) return 1; return sysctl_legacy_va_layout; } -static unsigned long mmap_base(unsigned long rnd) +static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { - unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long gap = rlim_stack->rlim_cur; if (gap < MIN_GAP) gap = MIN_GAP; @@ -158,18 +158,18 @@ unsigned long arch_mmap_rnd(void) return rnd << PAGE_SHIFT; } -void arch_pick_mmap_layout(struct mm_struct *mm) +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { unsigned long random_factor = 0UL; if (current->flags & PF_RANDOMIZE) random_factor = arch_mmap_rnd(); - if (mmap_is_legacy()) { + if (mmap_is_legacy(rlim_stack)) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->get_unmapped_area = arch_get_unmapped_area; } else { - mm->mmap_base = mmap_base(random_factor); + mm->mmap_base = mmap_base(random_factor, rlim_stack); mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S index 88a2075305d1..57154c5883b6 100644 --- a/arch/mips/net/bpf_jit_asm.S +++ b/arch/mips/net/bpf_jit_asm.S @@ -11,6 +11,7 @@ */ #include <asm/asm.h> +#include <asm/isa-rev.h> #include <asm/regdef.h> #include "bpf_jit.h" @@ -65,7 +66,7 @@ FEXPORT(sk_load_word_positive) lw $r_A, 0(t1) .set noreorder #ifdef CONFIG_CPU_LITTLE_ENDIAN -# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) +# if MIPS_ISA_REV >= 2 wsbh t0, $r_A rotr $r_A, t0, 16 # else @@ -92,7 +93,7 @@ FEXPORT(sk_load_half_positive) PTR_ADDU t1, $r_skb_data, offset lhu $r_A, 0(t1) #ifdef CONFIG_CPU_LITTLE_ENDIAN -# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) +# if MIPS_ISA_REV >= 2 wsbh $r_A, $r_A # else sll t0, $r_A, 8 @@ -170,7 +171,7 @@ FEXPORT(sk_load_byte_positive) NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) bpf_slow_path_common(4) #ifdef CONFIG_CPU_LITTLE_ENDIAN -# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) +# if MIPS_ISA_REV >= 2 wsbh t0, $r_s0 jr $r_ra rotr $r_A, t0, 16 @@ -196,7 +197,7 @@ NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) bpf_slow_path_common(2) #ifdef CONFIG_CPU_LITTLE_ENDIAN -# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) +# if MIPS_ISA_REV >= 2 jr $r_ra wsbh $r_A, $r_s0 # else diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c index 407f155f0bb6..f6b77788124a 100644 --- a/arch/mips/pci/pci-mt7620.c +++ b/arch/mips/pci/pci-mt7620.c @@ -315,6 +315,7 @@ static int mt7620_pci_probe(struct platform_device *pdev) break; case MT762X_SOC_MT7628AN: + case MT762X_SOC_MT7688: if (mt7628_pci_hw_init(pdev)) return -1; break; diff --git a/arch/mips/txx9/rbtx4927/setup.c b/arch/mips/txx9/rbtx4927/setup.c index f5b367e20dff..31955c1d5555 100644 --- a/arch/mips/txx9/rbtx4927/setup.c +++ b/arch/mips/txx9/rbtx4927/setup.c @@ -319,7 +319,7 @@ static void __init rbtx4927_mtd_init(void) static void __init rbtx4927_gpioled_init(void) { - static struct gpio_led leds[] = { + static const struct gpio_led leds[] = { { .name = "gpioled:green:0", .gpio = 0, .active_low = 1, }, { .name = "gpioled:green:1", .gpio = 1, .active_low = 1, }, }; diff --git a/arch/mips/vdso/elf.S b/arch/mips/vdso/elf.S index be37bbb1f061..428a1917afc6 100644 --- a/arch/mips/vdso/elf.S +++ b/arch/mips/vdso/elf.S @@ -10,6 +10,8 @@ #include "vdso.h" +#include <asm/isa-rev.h> + #include <linux/elfnote.h> #include <linux/version.h> @@ -40,11 +42,7 @@ __mips_abiflags: .byte __mips /* isa_level */ /* isa_rev */ -#ifdef __mips_isa_rev - .byte __mips_isa_rev -#else - .byte 0 -#endif + .byte MIPS_ISA_REV /* gpr_size */ #ifdef __mips64 @@ -54,7 +52,7 @@ __mips_abiflags: #endif /* cpr1_size */ -#if (defined(__mips_isa_rev) && __mips_isa_rev >= 6) || defined(__mips64) +#if (MIPS_ISA_REV >= 6) || defined(__mips64) .byte 2 /* AFL_REG_64 */ #else .byte 1 /* AFL_REG_32 */ diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index 7b9b20a381cb..1240f148ec0f 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -34,8 +34,8 @@ void flush_anon_page(struct vm_area_struct *vma, void flush_kernel_dcache_page(struct page *page); void flush_icache_range(unsigned long start, unsigned long end); void flush_icache_page(struct vm_area_struct *vma, struct page *page); -#define flush_dcache_mmap_lock(mapping) spin_lock_irq(&(mapping)->tree_lock) -#define flush_dcache_mmap_unlock(mapping) spin_unlock_irq(&(mapping)->tree_lock) +#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&(mapping)->i_pages) +#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages) #else #include <asm-generic/cacheflush.h> diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h index 55e383c173f7..18eb9f69f806 100644 --- a/arch/nios2/include/asm/cacheflush.h +++ b/arch/nios2/include/asm/cacheflush.h @@ -46,9 +46,7 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page, extern void flush_dcache_range(unsigned long start, unsigned long end); extern void invalidate_dcache_range(unsigned long start, unsigned long end); -#define flush_dcache_mmap_lock(mapping) \ - spin_lock_irq(&(mapping)->tree_lock) -#define flush_dcache_mmap_unlock(mapping) \ - spin_unlock_irq(&(mapping)->tree_lock) +#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) +#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) #endif /* _ASM_NIOS2_CACHEFLUSH_H */ diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c index 20e86209ef2e..ab88b6dd4679 100644 --- a/arch/nios2/kernel/time.c +++ b/arch/nios2/kernel/time.c @@ -336,9 +336,9 @@ static int __init nios2_time_init(struct device_node *timer) return ret; } -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { - ts->tv_sec = mktime(2007, 1, 1, 0, 0, 0); + ts->tv_sec = mktime64(2007, 1, 1, 0, 0, 0); ts->tv_nsec = 0; } diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 7e0bb9836b58..fc5a574c3482 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -338,6 +338,7 @@ source "mm/Kconfig" config COMPAT def_bool y depends on 64BIT + select COMPAT_BINFMT_ELF if BINFMT_ELF config SYSVIPC_COMPAT def_bool y diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index bd5ce31936f5..0c83644bfa5c 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -55,10 +55,8 @@ void invalidate_kernel_vmap_range(void *vaddr, int size); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *page); -#define flush_dcache_mmap_lock(mapping) \ - spin_lock_irq(&(mapping)->tree_lock) -#define flush_dcache_mmap_unlock(mapping) \ - spin_unlock_irq(&(mapping)->tree_lock) +#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) +#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) #define flush_icache_page(vma,page) do { \ flush_kernel_dcache_page(page); \ diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index c22db5323244..57b8b2a2fd4e 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -193,6 +193,12 @@ struct compat_shmid64_ds { }; /* + * The type of struct elf_prstatus.pr_reg in compatible core dumps. + */ +#define COMPAT_ELF_NGREG 80 +typedef compat_ulong_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; + +/* * A pointer passed in from user mode. This should not * be used for syscall parameters, just declare them * as pointers because the syscall entry code will have diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index 382d75a2ee4f..f019d3ec0c1c 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -6,7 +6,7 @@ * ELF register definitions.. */ -#include <asm/ptrace.h> +#include <linux/types.h> #define EM_PARISC 15 @@ -169,16 +169,12 @@ typedef struct elf64_fdesc { __u64 gp; } Elf64_Fdesc; -#ifdef __KERNEL__ - #ifdef CONFIG_64BIT #define Elf_Fdesc Elf64_Fdesc #else #define Elf_Fdesc Elf32_Fdesc #endif /*CONFIG_64BIT*/ -#endif /*__KERNEL__*/ - /* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr. */ #define PT_HP_TLS (PT_LOOS + 0x0) @@ -213,44 +209,44 @@ typedef struct elf64_fdesc { #define PF_HP_SBP 0x08000000 /* + * This yields a string that ld.so will use to load implementation + * specific libraries for optimization. This is more specific in + * intent than poking at uname or /proc/cpuinfo. + */ + +#define ELF_PLATFORM ("PARISC") + +/* * The following definitions are those for 32-bit ELF binaries on a 32-bit * kernel and for 64-bit binaries on a 64-bit kernel. To run 32-bit binaries - * on a 64-bit kernel, arch/parisc/kernel/binfmt_elf32.c defines these - * macros appropriately and then #includes binfmt_elf.c, which then includes - * this file. + * on a 64-bit kernel, fs/compat_binfmt_elf.c defines ELF_CLASS and then + * #includes binfmt_elf.c, which then includes this file. */ #ifndef ELF_CLASS -/* - * This is used to ensure we don't load something for the wrong architecture. - * - * Note that this header file is used by default in fs/binfmt_elf.c. So - * the following macros are for the default case. However, for the 64 - * bit kernel we also support 32 bit parisc binaries. To do that - * arch/parisc/kernel/binfmt_elf32.c defines its own set of these - * macros, and then it includes fs/binfmt_elf.c to provide an alternate - * elf binary handler for 32 bit binaries (on the 64 bit kernel). - */ #ifdef CONFIG_64BIT -#define ELF_CLASS ELFCLASS64 +#define ELF_CLASS ELFCLASS64 #else #define ELF_CLASS ELFCLASS32 #endif typedef unsigned long elf_greg_t; -/* - * This yields a string that ld.so will use to load implementation - * specific libraries for optimization. This is more specific in - * intent than poking at uname or /proc/cpuinfo. - */ - -#define ELF_PLATFORM ("PARISC\0") - #define SET_PERSONALITY(ex) \ +({ \ set_personality((current->personality & ~PER_MASK) | PER_LINUX); \ current->thread.map_base = DEFAULT_MAP_BASE; \ - current->thread.task_size = DEFAULT_TASK_SIZE \ + current->thread.task_size = DEFAULT_TASK_SIZE; \ + }) + +#endif /* ! ELF_CLASS */ + +#define COMPAT_SET_PERSONALITY(ex) \ +({ \ + set_thread_flag(TIF_32BIT); \ + current->thread.map_base = DEFAULT_MAP_BASE32; \ + current->thread.task_size = DEFAULT_TASK_SIZE32; \ + }) /* * Fill in general registers in a core dump. This saves pretty @@ -277,10 +273,12 @@ typedef unsigned long elf_greg_t; #define ELF_CORE_COPY_REGS(dst, pt) \ memset(dst, 0, sizeof(dst)); /* don't leak any "random" bits */ \ - memcpy(dst + 0, pt->gr, 32 * sizeof(elf_greg_t)); \ - memcpy(dst + 32, pt->sr, 8 * sizeof(elf_greg_t)); \ - memcpy(dst + 40, pt->iaoq, 2 * sizeof(elf_greg_t)); \ - memcpy(dst + 42, pt->iasq, 2 * sizeof(elf_greg_t)); \ + { int i; \ + for (i = 0; i < 32; i++) dst[i] = pt->gr[i]; \ + for (i = 0; i < 8; i++) dst[32 + i] = pt->sr[i]; \ + } \ + dst[40] = pt->iaoq[0]; dst[41] = pt->iaoq[1]; \ + dst[42] = pt->iasq[0]; dst[43] = pt->iasq[1]; \ dst[44] = pt->sar; dst[45] = pt->iir; \ dst[46] = pt->isr; dst[47] = pt->ior; \ dst[48] = mfctl(22); dst[49] = mfctl(0); \ @@ -292,7 +290,7 @@ typedef unsigned long elf_greg_t; dst[60] = mfctl(12); dst[61] = mfctl(13); \ dst[62] = mfctl(10); dst[63] = mfctl(15); -#endif /* ! ELF_CLASS */ +#define CORE_DUMP_USE_REGSET #define ELF_NGREG 80 /* We only need 64 at present, but leave space for expansion. */ @@ -310,7 +308,10 @@ extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *); struct pt_regs; /* forward declaration... */ -#define elf_check_arch(x) ((x)->e_machine == EM_PARISC && (x)->e_ident[EI_CLASS] == ELF_CLASS) +#define elf_check_arch(x) \ + ((x)->e_machine == EM_PARISC && (x)->e_ident[EI_CLASS] == ELF_CLASS) +#define compat_elf_check_arch(x) \ + ((x)->e_machine == EM_PARISC && (x)->e_ident[EI_CLASS] == ELFCLASS32) /* * These are used to set parameters in the core dumps. diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index a056a642bb31..870fbf8c7088 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -26,6 +26,7 @@ #define MAP_NONBLOCK 0x20000 /* do not block on IO */ #define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x80000 /* create a huge page mapping */ +#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ #define MS_SYNC 1 /* synchronous memory sync */ #define MS_ASYNC 2 /* sync memory asynchronously */ diff --git a/arch/parisc/include/uapi/asm/siginfo.h b/arch/parisc/include/uapi/asm/siginfo.h index be40331f757d..4a1062e05aaf 100644 --- a/arch/parisc/include/uapi/asm/siginfo.h +++ b/arch/parisc/include/uapi/asm/siginfo.h @@ -8,11 +8,4 @@ #include <asm-generic/siginfo.h> -/* - * SIGFPE si_codes - */ -#ifdef __KERNEL__ -#define FPE_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - #endif diff --git a/arch/parisc/kernel/binfmt_elf32.c b/arch/parisc/kernel/binfmt_elf32.c deleted file mode 100644 index 20dfa081ed0b..000000000000 --- a/arch/parisc/kernel/binfmt_elf32.c +++ /dev/null @@ -1,98 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Support for 32-bit Linux/Parisc ELF binaries on 64 bit kernels - * - * Copyright (C) 2000 John Marvin - * Copyright (C) 2000 Hewlett Packard Co. - * - * Heavily inspired from various other efforts to do the same thing - * (ia64,sparc64/mips64) - */ - -/* Make sure include/asm-parisc/elf.h does the right thing */ - -#define ELF_CLASS ELFCLASS32 - -#define ELF_CORE_COPY_REGS(dst, pt) \ - memset(dst, 0, sizeof(dst)); /* don't leak any "random" bits */ \ - { int i; \ - for (i = 0; i < 32; i++) dst[i] = (elf_greg_t) pt->gr[i]; \ - for (i = 0; i < 8; i++) dst[32 + i] = (elf_greg_t) pt->sr[i]; \ - } \ - dst[40] = (elf_greg_t) pt->iaoq[0]; dst[41] = (elf_greg_t) pt->iaoq[1]; \ - dst[42] = (elf_greg_t) pt->iasq[0]; dst[43] = (elf_greg_t) pt->iasq[1]; \ - dst[44] = (elf_greg_t) pt->sar; dst[45] = (elf_greg_t) pt->iir; \ - dst[46] = (elf_greg_t) pt->isr; dst[47] = (elf_greg_t) pt->ior; \ - dst[48] = (elf_greg_t) mfctl(22); dst[49] = (elf_greg_t) mfctl(0); \ - dst[50] = (elf_greg_t) mfctl(24); dst[51] = (elf_greg_t) mfctl(25); \ - dst[52] = (elf_greg_t) mfctl(26); dst[53] = (elf_greg_t) mfctl(27); \ - dst[54] = (elf_greg_t) mfctl(28); dst[55] = (elf_greg_t) mfctl(29); \ - dst[56] = (elf_greg_t) mfctl(30); dst[57] = (elf_greg_t) mfctl(31); \ - dst[58] = (elf_greg_t) mfctl( 8); dst[59] = (elf_greg_t) mfctl( 9); \ - dst[60] = (elf_greg_t) mfctl(12); dst[61] = (elf_greg_t) mfctl(13); \ - dst[62] = (elf_greg_t) mfctl(10); dst[63] = (elf_greg_t) mfctl(15); - - -typedef unsigned int elf_greg_t; - -#include <linux/spinlock.h> -#include <asm/processor.h> -#include <linux/module.h> -#include <linux/elfcore.h> -#include <linux/compat.h> /* struct compat_timeval */ - -#define elf_prstatus elf_prstatus32 -struct elf_prstatus32 -{ - struct elf_siginfo pr_info; /* Info associated with signal */ - short pr_cursig; /* Current signal */ - unsigned int pr_sigpend; /* Set of pending signals */ - unsigned int pr_sighold; /* Set of held signals */ - pid_t pr_pid; - pid_t pr_ppid; - pid_t pr_pgrp; - pid_t pr_sid; - struct compat_timeval pr_utime; /* User time */ - struct compat_timeval pr_stime; /* System time */ - struct compat_timeval pr_cutime; /* Cumulative user time */ - struct compat_timeval pr_cstime; /* Cumulative system time */ - elf_gregset_t pr_reg; /* GP registers */ - int pr_fpvalid; /* True if math co-processor being used. */ -}; - -#define elf_prpsinfo elf_prpsinfo32 -struct elf_prpsinfo32 -{ - char pr_state; /* numeric process state */ - char pr_sname; /* char for pr_state */ - char pr_zomb; /* zombie */ - char pr_nice; /* nice val */ - unsigned int pr_flag; /* flags */ - u16 pr_uid; - u16 pr_gid; - pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; - /* Lots missing */ - char pr_fname[16]; /* filename of executable */ - char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ -}; - -#define init_elf_binfmt init_elf32_binfmt - -#define ELF_PLATFORM ("PARISC32\0") - -/* - * We should probably use this macro to set a flag somewhere to indicate - * this is a 32 on 64 process. We could use PER_LINUX_32BIT, or we - * could set a processor dependent flag in the thread_struct. - */ - -#undef SET_PERSONALITY -#define SET_PERSONALITY(ex) \ - set_thread_flag(TIF_32BIT); \ - current->thread.map_base = DEFAULT_MAP_BASE32; \ - current->thread.task_size = DEFAULT_TASK_SIZE32 \ - -#undef ns_to_timeval -#define ns_to_timeval ns_to_compat_timeval - -#include "../../../fs/binfmt_elf.c" diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index a99da95fc9fd..bddd2acebdcc 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -254,7 +254,7 @@ parisc_cache_init(void) } } -void disable_sr_hashing(void) +void __init disable_sr_hashing(void) { int srhash_type, retval; unsigned long space_bits; diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 67b0f7532e83..22e6374ece44 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -38,9 +38,10 @@ #include <asm/cache.h> #include <asm/ldcw.h> #include <linux/linkage.h> +#include <linux/init.h> - .text - .align 128 + .section .text.hot + .align 16 ENTRY_CFI(flush_tlb_all_local) .proc @@ -328,8 +329,6 @@ fdsync: .procend ENDPROC_CFI(flush_data_cache_local) - .align 16 - /* Macros to serialize TLB purge operations on SMP. */ .macro tlb_lock la,flags,tmp @@ -1216,6 +1215,8 @@ ENTRY_CFI(flush_kernel_icache_range_asm) .procend ENDPROC_CFI(flush_kernel_icache_range_asm) + __INIT + /* align should cover use of rfi in disable_sr_hashing_asm and * srdis_done. */ diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index bbe46571ff96..b931745815e0 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -112,14 +112,6 @@ void machine_restart(char *cmd) } -void machine_halt(void) -{ - /* - ** The LED/ChassisCodes are updated by the led_halt() - ** function, called by the reboot notifier chain. - */ -} - void (*chassis_power_off)(void); /* @@ -158,6 +150,11 @@ void machine_power_off(void) void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); +void machine_halt(void) +{ + machine_power_off(); +} + void flush_thread(void) { /* Only needs to handle fpu stuff or perf monitors. diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 8c99ebbe2bac..43b308cfdf53 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -70,12 +70,18 @@ static inline unsigned long COLOR_ALIGN(unsigned long addr, * Top of mmap area (just below the process stack). */ -static unsigned long mmap_upper_limit(void) +/* + * When called from arch_get_unmapped_area(), rlim_stack will be NULL, + * indicating that "current" should be used instead of a passed-in + * value from the exec bprm as done with arch_pick_mmap_layout(). + */ +static unsigned long mmap_upper_limit(struct rlimit *rlim_stack) { unsigned long stack_base; /* Limit stack size - see setup_arg_pages() in fs/exec.c */ - stack_base = rlimit_max(RLIMIT_STACK); + stack_base = rlim_stack ? rlim_stack->rlim_max + : rlimit_max(RLIMIT_STACK); if (stack_base > STACK_SIZE_MAX) stack_base = STACK_SIZE_MAX; @@ -127,7 +133,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, info.flags = 0; info.length = len; info.low_limit = mm->mmap_legacy_base; - info.high_limit = mmap_upper_limit(); + info.high_limit = mmap_upper_limit(NULL); info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0; info.align_offset = shared_align_offset(last_mmap, pgoff); addr = vm_unmapped_area(&info); @@ -250,10 +256,10 @@ static unsigned long mmap_legacy_base(void) * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: */ -void arch_pick_mmap_layout(struct mm_struct *mm) +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { mm->mmap_legacy_base = mmap_legacy_base(); - mm->mmap_base = mmap_upper_limit(); + mm->mmap_base = mmap_upper_limit(rlim_stack); if (mmap_is_legacy()) { mm->mmap_base = mm->mmap_legacy_base; diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index f7e684560186..c3830400ca28 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -174,7 +174,7 @@ static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) /* we treat tod_sec as unsigned, so this can work until year 2106 */ rtc_time64_to_tm(tod_data.tod_sec, tm); - return rtc_valid_tm(tm); + return 0; } static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index c919e6c0a687..68e671a11987 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -627,9 +627,10 @@ void notrace handle_interruption(int code, struct pt_regs *regs) on condition */ if(user_mode(regs)){ si.si_signo = SIGFPE; - /* Set to zero, and let the userspace app figure it out from - the insn pointed to by si_addr */ - si.si_code = FPE_FIXME; + /* Let userspace app figure it out from the insn pointed + * to by si_addr. + */ + si.si_code = FPE_CONDTRAP; si.si_addr = (void __user *) regs->iaoq[0]; force_sig_info(SIGFPE, &si, current); return; diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index d503f344e476..b24ce40acd47 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -39,12 +39,12 @@ #define MIN_GAP (128*1024*1024) #define MAX_GAP (TASK_SIZE/6*5) -static inline int mmap_is_legacy(void) +static inline int mmap_is_legacy(struct rlimit *rlim_stack) { if (current->personality & ADDR_COMPAT_LAYOUT) return 1; - if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + if (rlim_stack->rlim_cur == RLIM_INFINITY) return 1; return sysctl_legacy_va_layout; @@ -76,9 +76,10 @@ static inline unsigned long stack_maxrandom_size(void) return (1<<30); } -static inline unsigned long mmap_base(unsigned long rnd) +static inline unsigned long mmap_base(unsigned long rnd, + struct rlimit *rlim_stack) { - unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_maxrandom_size() + stack_guard_gap; /* Values close to RLIM_INFINITY can overflow. */ @@ -196,26 +197,28 @@ radix__arch_get_unmapped_area_topdown(struct file *filp, } static void radix__arch_pick_mmap_layout(struct mm_struct *mm, - unsigned long random_factor) + unsigned long random_factor, + struct rlimit *rlim_stack) { - if (mmap_is_legacy()) { + if (mmap_is_legacy(rlim_stack)) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = radix__arch_get_unmapped_area; } else { - mm->mmap_base = mmap_base(random_factor); + mm->mmap_base = mmap_base(random_factor, rlim_stack); mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown; } } #else /* dummy */ extern void radix__arch_pick_mmap_layout(struct mm_struct *mm, - unsigned long random_factor); + unsigned long random_factor, + struct rlimit *rlim_stack); #endif /* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: */ -void arch_pick_mmap_layout(struct mm_struct *mm) +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { unsigned long random_factor = 0UL; @@ -223,16 +226,17 @@ void arch_pick_mmap_layout(struct mm_struct *mm) random_factor = arch_mmap_rnd(); if (radix_enabled()) - return radix__arch_pick_mmap_layout(mm, random_factor); + return radix__arch_pick_mmap_layout(mm, random_factor, + rlim_stack); /* * Fall back to the standard layout if the personality * bit is set, or if the expected stack growth is unlimited: */ - if (mmap_is_legacy()) { + if (mmap_is_legacy(rlim_stack)) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = arch_get_unmapped_area; } else { - mm->mmap_base = mmap_base(random_factor); + mm->mmap_base = mmap_base(random_factor, rlim_stack); mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 9a8a084e4aba..4c615fcb0cf0 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -75,8 +75,7 @@ EXPORT_SYMBOL_GPL(mm_iommu_preregistered); /* * Taken from alloc_migrate_target with changes to remove CMA allocations */ -struct page *new_iommu_non_cma_page(struct page *page, unsigned long private, - int **resultp) +struct page *new_iommu_non_cma_page(struct page *page, unsigned long private) { gfp_t gfp_mask = GFP_USER; struct page *new_page; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 516e23de5a3d..48fbb41af5d1 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -824,6 +824,9 @@ static int __init opal_init(void) /* Create i2c platform devices */ opal_pdev_init("ibm,opal-i2c"); + /* Handle non-volatile memory devices */ + opal_pdev_init("pmem-region"); + /* Setup a heatbeat thread if requested by OPAL */ opal_init_heartbeat(); diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 831bdcf407bb..0a7627cdb34e 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -37,11 +37,11 @@ static unsigned long stack_maxrandom_size(void) #define MIN_GAP (32*1024*1024) #define MAX_GAP (STACK_TOP/6*5) -static inline int mmap_is_legacy(void) +static inline int mmap_is_legacy(struct rlimit *rlim_stack) { if (current->personality & ADDR_COMPAT_LAYOUT) return 1; - if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + if (rlim_stack->rlim_cur == RLIM_INFINITY) return 1; return sysctl_legacy_va_layout; } @@ -56,9 +56,10 @@ static unsigned long mmap_base_legacy(unsigned long rnd) return TASK_UNMAPPED_BASE + rnd; } -static inline unsigned long mmap_base(unsigned long rnd) +static inline unsigned long mmap_base(unsigned long rnd, + struct rlimit *rlim_stack) { - unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long gap = rlim_stack->rlim_cur; if (gap < MIN_GAP) gap = MIN_GAP; @@ -184,7 +185,7 @@ check_asce_limit: * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: */ -void arch_pick_mmap_layout(struct mm_struct *mm) +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { unsigned long random_factor = 0UL; @@ -195,11 +196,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * Fall back to the standard layout if the personality * bit is set, or if the expected stack growth is unlimited: */ - if (mmap_is_legacy()) { + if (mmap_is_legacy(rlim_stack)) { mm->mmap_base = mmap_base_legacy(random_factor); mm->get_unmapped_area = arch_get_unmapped_area; } else { - mm->mmap_base = mmap_base(random_factor); + mm->mmap_base = mmap_base(random_factor, rlim_stack); mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 348a17ecdf66..9ef8de63f28b 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -276,7 +276,7 @@ static unsigned long mmap_rnd(void) return rnd << PAGE_SHIFT; } -void arch_pick_mmap_layout(struct mm_struct *mm) +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { unsigned long random_factor = mmap_rnd(); unsigned long gap; @@ -285,7 +285,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * Fall back to the standard layout if the personality * bit is set, or if the expected stack growth is unlimited: */ - gap = rlimit(RLIMIT_STACK); + gap = rlim_stack->rlim_cur; if (!test_thread_flag(TIF_32BIT) || (current->personality & ADDR_COMPAT_LAYOUT) || gap == RLIM_INFINITY || diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net index e871af24d9cd..c390f3deb0dc 100644 --- a/arch/um/Kconfig.net +++ b/arch/um/Kconfig.net @@ -109,6 +109,17 @@ config UML_NET_DAEMON more than one without conflict. If you don't need UML networking, say N. +config UML_NET_VECTOR + bool "Vector I/O high performance network devices" + depends on UML_NET + help + This User-Mode Linux network driver uses multi-message send + and receive functions. The host running the UML guest must have + a linux kernel version above 3.0 and a libc version > 2.13. + This driver provides tap, raw, gre and l2tpv3 network transports + with up to 4 times higher network throughput than the UML network + drivers. + config UML_NET_VDE bool "VDE transport" depends on UML_NET diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index e7582e1d248c..16b3cebddafb 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -9,6 +9,7 @@ slip-objs := slip_kern.o slip_user.o slirp-objs := slirp_kern.o slirp_user.o daemon-objs := daemon_kern.o daemon_user.o +vector-objs := vector_kern.o vector_user.o vector_transports.o umcast-objs := umcast_kern.o umcast_user.o net-objs := net_kern.o net_user.o mconsole-objs := mconsole_kern.o mconsole_user.o @@ -43,6 +44,7 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o obj-$(CONFIG_UML_NET_DAEMON) += daemon.o +obj-$(CONFIG_UML_NET_VECTOR) += vector.o obj-$(CONFIG_UML_NET_VDE) += vde.o obj-$(CONFIG_UML_NET_MCAST) += umcast.o obj-$(CONFIG_UML_NET_PCAP) += pcap.o @@ -61,7 +63,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o obj-$(CONFIG_UML_RANDOM) += random.o # pcap_user.o must be added explicitly. -USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o +USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH) include arch/um/scripts/Makefile.rules diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index acbe6c67afba..05588f9466c7 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -171,56 +171,19 @@ int enable_chan(struct line *line) return err; } -/* Items are added in IRQ context, when free_irq can't be called, and - * removed in process context, when it can. - * This handles interrupt sources which disappear, and which need to - * be permanently disabled. This is discovered in IRQ context, but - * the freeing of the IRQ must be done later. - */ -static DEFINE_SPINLOCK(irqs_to_free_lock); -static LIST_HEAD(irqs_to_free); - -void free_irqs(void) -{ - struct chan *chan; - LIST_HEAD(list); - struct list_head *ele; - unsigned long flags; - - spin_lock_irqsave(&irqs_to_free_lock, flags); - list_splice_init(&irqs_to_free, &list); - spin_unlock_irqrestore(&irqs_to_free_lock, flags); - - list_for_each(ele, &list) { - chan = list_entry(ele, struct chan, free_list); - - if (chan->input && chan->enabled) - um_free_irq(chan->line->driver->read_irq, chan); - if (chan->output && chan->enabled) - um_free_irq(chan->line->driver->write_irq, chan); - chan->enabled = 0; - } -} - static void close_one_chan(struct chan *chan, int delay_free_irq) { - unsigned long flags; - if (!chan->opened) return; - if (delay_free_irq) { - spin_lock_irqsave(&irqs_to_free_lock, flags); - list_add(&chan->free_list, &irqs_to_free); - spin_unlock_irqrestore(&irqs_to_free_lock, flags); - } - else { - if (chan->input && chan->enabled) - um_free_irq(chan->line->driver->read_irq, chan); - if (chan->output && chan->enabled) - um_free_irq(chan->line->driver->write_irq, chan); - chan->enabled = 0; - } + /* we can safely call free now - it will be marked + * as free and freed once the IRQ stopped processing + */ + if (chan->input && chan->enabled) + um_free_irq(chan->line->driver->read_irq, chan); + if (chan->output && chan->enabled) + um_free_irq(chan->line->driver->write_irq, chan); + chan->enabled = 0; if (chan->ops->close != NULL) (*chan->ops->close)(chan->fd, chan->data); diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 366e57f5e8d6..8d80b27502e6 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -284,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data) if (err) return err; if (output) - err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, + err = um_request_irq(driver->write_irq, fd, IRQ_NONE, line_write_interrupt, IRQF_SHARED, driver->write_irq_name, data); return err; diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index b305f8247909..3ef1b48e064a 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -288,7 +288,7 @@ static void uml_net_user_timer_expire(struct timer_list *t) #endif } -static void setup_etheraddr(struct net_device *dev, char *str) +void uml_net_setup_etheraddr(struct net_device *dev, char *str) { unsigned char *addr = dev->dev_addr; char *end; @@ -412,7 +412,7 @@ static void eth_configure(int n, void *init, char *mac, */ snprintf(dev->name, sizeof(dev->name), "eth%d", n); - setup_etheraddr(dev, mac); + uml_net_setup_etheraddr(dev, mac); printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr); diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 37c51a6be690..778a0e52d5a5 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -13,6 +13,7 @@ #include <linux/miscdevice.h> #include <linux/delay.h> #include <linux/uaccess.h> +#include <init.h> #include <irq_kern.h> #include <os.h> @@ -154,7 +155,14 @@ err_out_cleanup_hw: /* * rng_cleanup - shutdown RNG module */ -static void __exit rng_cleanup (void) + +static void cleanup(void) +{ + free_irq_by_fd(random_fd); + os_close_file(random_fd); +} + +static void __exit rng_cleanup(void) { os_close_file(random_fd); misc_deregister (&rng_miscdev); @@ -162,6 +170,7 @@ static void __exit rng_cleanup (void) module_init (rng_init); module_exit (rng_cleanup); +__uml_exitcall(cleanup); MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver"); MODULE_LICENSE("GPL"); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index b55fe9bf5d3e..d4e8c497ae86 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1587,11 +1587,11 @@ int io_thread(void *arg) do { res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n); - if (res > 0) { + if (res >= 0) { written += res; } else { if (res != -EAGAIN) { - printk("io_thread - read failed, fd = %d, " + printk("io_thread - write failed, fd = %d, " "err = %d\n", kernel_fd, -n); } } diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c new file mode 100644 index 000000000000..02168fe25105 --- /dev/null +++ b/arch/um/drivers/vector_kern.c @@ -0,0 +1,1633 @@ +/* + * Copyright (C) 2017 - Cambridge Greys Limited + * Copyright (C) 2011 - 2014 Cisco Systems Inc + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include <linux/version.h> +#include <linux/bootmem.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/inetdevice.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <linux/rtnetlink.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <init.h> +#include <irq_kern.h> +#include <irq_user.h> +#include <net_kern.h> +#include <os.h> +#include "mconsole_kern.h" +#include "vector_user.h" +#include "vector_kern.h" + +/* + * Adapted from network devices with the following major changes: + * All transports are static - simplifies the code significantly + * Multiple FDs/IRQs per device + * Vector IO optionally used for read/write, falling back to legacy + * based on configuration and/or availability + * Configuration is no longer positional - L2TPv3 and GRE require up to + * 10 parameters, passing this as positional is not fit for purpose. + * Only socket transports are supported + */ + + +#define DRIVER_NAME "uml-vector" +#define DRIVER_VERSION "01" +struct vector_cmd_line_arg { + struct list_head list; + int unit; + char *arguments; +}; + +struct vector_device { + struct list_head list; + struct net_device *dev; + struct platform_device pdev; + int unit; + int opened; +}; + +static LIST_HEAD(vec_cmd_line); + +static DEFINE_SPINLOCK(vector_devices_lock); +static LIST_HEAD(vector_devices); + +static int driver_registered; + +static void vector_eth_configure(int n, struct arglist *def); + +/* Argument accessors to set variables (and/or set default values) + * mtu, buffer sizing, default headroom, etc + */ + +#define DEFAULT_HEADROOM 2 +#define SAFETY_MARGIN 32 +#define DEFAULT_VECTOR_SIZE 64 +#define TX_SMALL_PACKET 128 +#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1) + +static const struct { + const char string[ETH_GSTRING_LEN]; +} ethtool_stats_keys[] = { + { "rx_queue_max" }, + { "rx_queue_running_average" }, + { "tx_queue_max" }, + { "tx_queue_running_average" }, + { "rx_encaps_errors" }, + { "tx_timeout_count" }, + { "tx_restart_queue" }, + { "tx_kicks" }, + { "tx_flow_control_xon" }, + { "tx_flow_control_xoff" }, + { "rx_csum_offload_good" }, + { "rx_csum_offload_errors"}, + { "sg_ok"}, + { "sg_linearized"}, +}; + +#define VECTOR_NUM_STATS ARRAY_SIZE(ethtool_stats_keys) + +static void vector_reset_stats(struct vector_private *vp) +{ + vp->estats.rx_queue_max = 0; + vp->estats.rx_queue_running_average = 0; + vp->estats.tx_queue_max = 0; + vp->estats.tx_queue_running_average = 0; + vp->estats.rx_encaps_errors = 0; + vp->estats.tx_timeout_count = 0; + vp->estats.tx_restart_queue = 0; + vp->estats.tx_kicks = 0; + vp->estats.tx_flow_control_xon = 0; + vp->estats.tx_flow_control_xoff = 0; + vp->estats.sg_ok = 0; + vp->estats.sg_linearized = 0; +} + +static int get_mtu(struct arglist *def) +{ + char *mtu = uml_vector_fetch_arg(def, "mtu"); + long result; + + if (mtu != NULL) { + if (kstrtoul(mtu, 10, &result) == 0) + return result; + } + return ETH_MAX_PACKET; +} + +static int get_depth(struct arglist *def) +{ + char *mtu = uml_vector_fetch_arg(def, "depth"); + long result; + + if (mtu != NULL) { + if (kstrtoul(mtu, 10, &result) == 0) + return result; + } + return DEFAULT_VECTOR_SIZE; +} + +static int get_headroom(struct arglist *def) +{ + char *mtu = uml_vector_fetch_arg(def, "headroom"); + long result; + + if (mtu != NULL) { + if (kstrtoul(mtu, 10, &result) == 0) + return result; + } + return DEFAULT_HEADROOM; +} + +static int get_req_size(struct arglist *def) +{ + char *gro = uml_vector_fetch_arg(def, "gro"); + long result; + + if (gro != NULL) { + if (kstrtoul(gro, 10, &result) == 0) { + if (result > 0) + return 65536; + } + } + return get_mtu(def) + ETH_HEADER_OTHER + + get_headroom(def) + SAFETY_MARGIN; +} + + +static int get_transport_options(struct arglist *def) +{ + char *transport = uml_vector_fetch_arg(def, "transport"); + char *vector = uml_vector_fetch_arg(def, "vec"); + + int vec_rx = VECTOR_RX; + int vec_tx = VECTOR_TX; + long parsed; + + if (vector != NULL) { + if (kstrtoul(vector, 10, &parsed) == 0) { + if (parsed == 0) { + vec_rx = 0; + vec_tx = 0; + } + } + } + + + if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) + return (vec_rx | VECTOR_BPF); + if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) + return (vec_rx | vec_tx); + return (vec_rx | vec_tx); +} + + +/* A mini-buffer for packet drop read + * All of our supported transports are datagram oriented and we always + * read using recvmsg or recvmmsg. If we pass a buffer which is smaller + * than the packet size it still counts as full packet read and will + * clean the incoming stream to keep sigio/epoll happy + */ + +#define DROP_BUFFER_SIZE 32 + +static char *drop_buffer; + +/* Array backed queues optimized for bulk enqueue/dequeue and + * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios. + * For more details and full design rationale see + * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt + */ + + +/* + * Advance the mmsg queue head by n = advance. Resets the queue to + * maximum enqueue/dequeue-at-once capacity if possible. Called by + * dequeuers. Caller must hold the head_lock! + */ + +static int vector_advancehead(struct vector_queue *qi, int advance) +{ + int queue_depth; + + qi->head = + (qi->head + advance) + % qi->max_depth; + + + spin_lock(&qi->tail_lock); + qi->queue_depth -= advance; + + /* we are at 0, use this to + * reset head and tail so we can use max size vectors + */ + + if (qi->queue_depth == 0) { + qi->head = 0; + qi->tail = 0; + } + queue_depth = qi->queue_depth; + spin_unlock(&qi->tail_lock); + return queue_depth; +} + +/* Advance the queue tail by n = advance. + * This is called by enqueuers which should hold the + * head lock already + */ + +static int vector_advancetail(struct vector_queue *qi, int advance) +{ + int queue_depth; + + qi->tail = + (qi->tail + advance) + % qi->max_depth; + spin_lock(&qi->head_lock); + qi->queue_depth += advance; + queue_depth = qi->queue_depth; + spin_unlock(&qi->head_lock); + return queue_depth; +} + +static int prep_msg(struct vector_private *vp, + struct sk_buff *skb, + struct iovec *iov) +{ + int iov_index = 0; + int nr_frags, frag; + skb_frag_t *skb_frag; + + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags > MAX_IOV_SIZE) { + if (skb_linearize(skb) != 0) + goto drop; + } + if (vp->header_size > 0) { + iov[iov_index].iov_len = vp->header_size; + vp->form_header(iov[iov_index].iov_base, skb, vp); + iov_index++; + } + iov[iov_index].iov_base = skb->data; + if (nr_frags > 0) { + iov[iov_index].iov_len = skb->len - skb->data_len; + vp->estats.sg_ok++; + } else + iov[iov_index].iov_len = skb->len; + iov_index++; + for (frag = 0; frag < nr_frags; frag++) { + skb_frag = &skb_shinfo(skb)->frags[frag]; + iov[iov_index].iov_base = skb_frag_address_safe(skb_frag); + iov[iov_index].iov_len = skb_frag_size(skb_frag); + iov_index++; + } + return iov_index; +drop: + return -1; +} +/* + * Generic vector enqueue with support for forming headers using transport + * specific callback. Allows GRE, L2TPv3, RAW and other transports + * to use a common enqueue procedure in vector mode + */ + +static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) +{ + struct vector_private *vp = netdev_priv(qi->dev); + int queue_depth; + int packet_len; + struct mmsghdr *mmsg_vector = qi->mmsg_vector; + int iov_count; + + spin_lock(&qi->tail_lock); + spin_lock(&qi->head_lock); + queue_depth = qi->queue_depth; + spin_unlock(&qi->head_lock); + + if (skb) + packet_len = skb->len; + + if (queue_depth < qi->max_depth) { + + *(qi->skbuff_vector + qi->tail) = skb; + mmsg_vector += qi->tail; + iov_count = prep_msg( + vp, + skb, + mmsg_vector->msg_hdr.msg_iov + ); + if (iov_count < 1) + goto drop; + mmsg_vector->msg_hdr.msg_iovlen = iov_count; + mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr; + mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size; + queue_depth = vector_advancetail(qi, 1); + } else + goto drop; + spin_unlock(&qi->tail_lock); + return queue_depth; +drop: + qi->dev->stats.tx_dropped++; + if (skb != NULL) { + packet_len = skb->len; + dev_consume_skb_any(skb); + netdev_completed_queue(qi->dev, 1, packet_len); + } + spin_unlock(&qi->tail_lock); + return queue_depth; +} + +static int consume_vector_skbs(struct vector_queue *qi, int count) +{ + struct sk_buff *skb; + int skb_index; + int bytes_compl = 0; + + for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) { + skb = *(qi->skbuff_vector + skb_index); + /* mark as empty to ensure correct destruction if + * needed + */ + bytes_compl += skb->len; + *(qi->skbuff_vector + skb_index) = NULL; + dev_consume_skb_any(skb); + } + qi->dev->stats.tx_bytes += bytes_compl; + qi->dev->stats.tx_packets += count; + netdev_completed_queue(qi->dev, count, bytes_compl); + return vector_advancehead(qi, count); +} + +/* + * Generic vector deque via sendmmsg with support for forming headers + * using transport specific callback. Allows GRE, L2TPv3, RAW and + * other transports to use a common dequeue procedure in vector mode + */ + + +static int vector_send(struct vector_queue *qi) +{ + struct vector_private *vp = netdev_priv(qi->dev); + struct mmsghdr *send_from; + int result = 0, send_len, queue_depth = qi->max_depth; + + if (spin_trylock(&qi->head_lock)) { + if (spin_trylock(&qi->tail_lock)) { + /* update queue_depth to current value */ + queue_depth = qi->queue_depth; + spin_unlock(&qi->tail_lock); + while (queue_depth > 0) { + /* Calculate the start of the vector */ + send_len = queue_depth; + send_from = qi->mmsg_vector; + send_from += qi->head; + /* Adjust vector size if wraparound */ + if (send_len + qi->head > qi->max_depth) + send_len = qi->max_depth - qi->head; + /* Try to TX as many packets as possible */ + if (send_len > 0) { + result = uml_vector_sendmmsg( + vp->fds->tx_fd, + send_from, + send_len, + 0 + ); + vp->in_write_poll = + (result != send_len); + } + /* For some of the sendmmsg error scenarios + * we may end being unsure in the TX success + * for all packets. It is safer to declare + * them all TX-ed and blame the network. + */ + if (result < 0) { + if (net_ratelimit()) + netdev_err(vp->dev, "sendmmsg err=%i\n", + result); + result = send_len; + } + if (result > 0) { + queue_depth = + consume_vector_skbs(qi, result); + /* This is equivalent to an TX IRQ. + * Restart the upper layers to feed us + * more packets. + */ + if (result > vp->estats.tx_queue_max) + vp->estats.tx_queue_max = result; + vp->estats.tx_queue_running_average = + (vp->estats.tx_queue_running_average + result) >> 1; + } + netif_trans_update(qi->dev); + netif_wake_queue(qi->dev); + /* if TX is busy, break out of the send loop, + * poll write IRQ will reschedule xmit for us + */ + if (result != send_len) { + vp->estats.tx_restart_queue++; + break; + } + } + } + spin_unlock(&qi->head_lock); + } else { + tasklet_schedule(&vp->tx_poll); + } + return queue_depth; +} + +/* Queue destructor. Deliberately stateless so we can use + * it in queue cleanup if initialization fails. + */ + +static void destroy_queue(struct vector_queue *qi) +{ + int i; + struct iovec *iov; + struct vector_private *vp = netdev_priv(qi->dev); + struct mmsghdr *mmsg_vector; + + if (qi == NULL) + return; + /* deallocate any skbuffs - we rely on any unused to be + * set to NULL. + */ + if (qi->skbuff_vector != NULL) { + for (i = 0; i < qi->max_depth; i++) { + if (*(qi->skbuff_vector + i) != NULL) + dev_kfree_skb_any(*(qi->skbuff_vector + i)); + } + kfree(qi->skbuff_vector); + } + /* deallocate matching IOV structures including header buffs */ + if (qi->mmsg_vector != NULL) { + mmsg_vector = qi->mmsg_vector; + for (i = 0; i < qi->max_depth; i++) { + iov = mmsg_vector->msg_hdr.msg_iov; + if (iov != NULL) { + if ((vp->header_size > 0) && + (iov->iov_base != NULL)) + kfree(iov->iov_base); + kfree(iov); + } + mmsg_vector++; + } + kfree(qi->mmsg_vector); + } + kfree(qi); +} + +/* + * Queue constructor. Create a queue with a given side. + */ +static struct vector_queue *create_queue( + struct vector_private *vp, + int max_size, + int header_size, + int num_extra_frags) +{ + struct vector_queue *result; + int i; + struct iovec *iov; + struct mmsghdr *mmsg_vector; + + result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL); + if (result == NULL) + goto out_fail; + result->max_depth = max_size; + result->dev = vp->dev; + result->mmsg_vector = kmalloc( + (sizeof(struct mmsghdr) * max_size), GFP_KERNEL); + result->skbuff_vector = kmalloc( + (sizeof(void *) * max_size), GFP_KERNEL); + if (result->mmsg_vector == NULL || result->skbuff_vector == NULL) + goto out_fail; + + mmsg_vector = result->mmsg_vector; + for (i = 0; i < max_size; i++) { + /* Clear all pointers - we use non-NULL as marking on + * what to free on destruction + */ + *(result->skbuff_vector + i) = NULL; + mmsg_vector->msg_hdr.msg_iov = NULL; + mmsg_vector++; + } + mmsg_vector = result->mmsg_vector; + result->max_iov_frags = num_extra_frags; + for (i = 0; i < max_size; i++) { + if (vp->header_size > 0) + iov = kmalloc( + sizeof(struct iovec) * (3 + num_extra_frags), + GFP_KERNEL + ); + else + iov = kmalloc( + sizeof(struct iovec) * (2 + num_extra_frags), + GFP_KERNEL + ); + if (iov == NULL) + goto out_fail; + mmsg_vector->msg_hdr.msg_iov = iov; + mmsg_vector->msg_hdr.msg_iovlen = 1; + mmsg_vector->msg_hdr.msg_control = NULL; + mmsg_vector->msg_hdr.msg_controllen = 0; + mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT; + mmsg_vector->msg_hdr.msg_name = NULL; + mmsg_vector->msg_hdr.msg_namelen = 0; + if (vp->header_size > 0) { + iov->iov_base = kmalloc(header_size, GFP_KERNEL); + if (iov->iov_base == NULL) + goto out_fail; + iov->iov_len = header_size; + mmsg_vector->msg_hdr.msg_iovlen = 2; + iov++; + } + iov->iov_base = NULL; + iov->iov_len = 0; + mmsg_vector++; + } + spin_lock_init(&result->head_lock); + spin_lock_init(&result->tail_lock); + result->queue_depth = 0; + result->head = 0; + result->tail = 0; + return result; +out_fail: + destroy_queue(result); + return NULL; +} + +/* + * We do not use the RX queue as a proper wraparound queue for now + * This is not necessary because the consumption via netif_rx() + * happens in-line. While we can try using the return code of + * netif_rx() for flow control there are no drivers doing this today. + * For this RX specific use we ignore the tail/head locks and + * just read into a prepared queue filled with skbuffs. + */ + +static struct sk_buff *prep_skb( + struct vector_private *vp, + struct user_msghdr *msg) +{ + int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN; + struct sk_buff *result; + int iov_index = 0, len; + struct iovec *iov = msg->msg_iov; + int err, nr_frags, frag; + skb_frag_t *skb_frag; + + if (vp->req_size <= linear) + len = linear; + else + len = vp->req_size; + result = alloc_skb_with_frags( + linear, + len - vp->max_packet, + 3, + &err, + GFP_ATOMIC + ); + if (vp->header_size > 0) + iov_index++; + if (result == NULL) { + iov[iov_index].iov_base = NULL; + iov[iov_index].iov_len = 0; + goto done; + } + skb_reserve(result, vp->headroom); + result->dev = vp->dev; + skb_put(result, vp->max_packet); + result->data_len = len - vp->max_packet; + result->len += len - vp->max_packet; + skb_reset_mac_header(result); + result->ip_summed = CHECKSUM_NONE; + iov[iov_index].iov_base = result->data; + iov[iov_index].iov_len = vp->max_packet; + iov_index++; + + nr_frags = skb_shinfo(result)->nr_frags; + for (frag = 0; frag < nr_frags; frag++) { + skb_frag = &skb_shinfo(result)->frags[frag]; + iov[iov_index].iov_base = skb_frag_address_safe(skb_frag); + if (iov[iov_index].iov_base != NULL) + iov[iov_index].iov_len = skb_frag_size(skb_frag); + else + iov[iov_index].iov_len = 0; + iov_index++; + } +done: + msg->msg_iovlen = iov_index; + return result; +} + + +/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/ + +static void prep_queue_for_rx(struct vector_queue *qi) +{ + struct vector_private *vp = netdev_priv(qi->dev); + struct mmsghdr *mmsg_vector = qi->mmsg_vector; + void **skbuff_vector = qi->skbuff_vector; + int i; + + if (qi->queue_depth == 0) + return; + for (i = 0; i < qi->queue_depth; i++) { + /* it is OK if allocation fails - recvmmsg with NULL data in + * iov argument still performs an RX, just drops the packet + * This allows us stop faffing around with a "drop buffer" + */ + + *skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr); + skbuff_vector++; + mmsg_vector++; + } + qi->queue_depth = 0; +} + +static struct vector_device *find_device(int n) +{ + struct vector_device *device; + struct list_head *ele; + + spin_lock(&vector_devices_lock); + list_for_each(ele, &vector_devices) { + device = list_entry(ele, struct vector_device, list); + if (device->unit == n) + goto out; + } + device = NULL; + out: + spin_unlock(&vector_devices_lock); + return device; +} + +static int vector_parse(char *str, int *index_out, char **str_out, + char **error_out) +{ + int n, len, err; + char *start = str; + + len = strlen(str); + + while ((*str != ':') && (strlen(str) > 1)) + str++; + if (*str != ':') { + *error_out = "Expected ':' after device number"; + return -EINVAL; + } + *str = '\0'; + + err = kstrtouint(start, 0, &n); + if (err < 0) { + *error_out = "Bad device number"; + return err; + } + + str++; + if (find_device(n)) { + *error_out = "Device already configured"; + return -EINVAL; + } + + *index_out = n; + *str_out = str; + return 0; +} + +static int vector_config(char *str, char **error_out) +{ + int err, n; + char *params; + struct arglist *parsed; + + err = vector_parse(str, &n, ¶ms, error_out); + if (err != 0) + return err; + + /* This string is broken up and the pieces used by the underlying + * driver. We should copy it to make sure things do not go wrong + * later. + */ + + params = kstrdup(params, GFP_KERNEL); + if (params == NULL) { + *error_out = "vector_config failed to strdup string"; + return -ENOMEM; + } + + parsed = uml_parse_vector_ifspec(params); + + if (parsed == NULL) { + *error_out = "vector_config failed to parse parameters"; + return -EINVAL; + } + + vector_eth_configure(n, parsed); + return 0; +} + +static int vector_id(char **str, int *start_out, int *end_out) +{ + char *end; + int n; + + n = simple_strtoul(*str, &end, 0); + if ((*end != '\0') || (end == *str)) + return -1; + + *start_out = n; + *end_out = n; + *str = end; + return n; +} + +static int vector_remove(int n, char **error_out) +{ + struct vector_device *vec_d; + struct net_device *dev; + struct vector_private *vp; + + vec_d = find_device(n); + if (vec_d == NULL) + return -ENODEV; + dev = vec_d->dev; + vp = netdev_priv(dev); + if (vp->fds != NULL) + return -EBUSY; + unregister_netdev(dev); + platform_device_unregister(&vec_d->pdev); + return 0; +} + +/* + * There is no shared per-transport initialization code, so + * we will just initialize each interface one by one and + * add them to a list + */ + +static struct platform_driver uml_net_driver = { + .driver = { + .name = DRIVER_NAME, + }, +}; + + +static void vector_device_release(struct device *dev) +{ + struct vector_device *device = dev_get_drvdata(dev); + struct net_device *netdev = device->dev; + + list_del(&device->list); + kfree(device); + free_netdev(netdev); +} + +/* Bog standard recv using recvmsg - not used normally unless the user + * explicitly specifies not to use recvmmsg vector RX. + */ + +static int vector_legacy_rx(struct vector_private *vp) +{ + int pkt_len; + struct user_msghdr hdr; + struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */ + int iovpos = 0; + struct sk_buff *skb; + int header_check; + + hdr.msg_name = NULL; + hdr.msg_namelen = 0; + hdr.msg_iov = (struct iovec *) &iov; + hdr.msg_control = NULL; + hdr.msg_controllen = 0; + hdr.msg_flags = 0; + + if (vp->header_size > 0) { + iov[0].iov_base = vp->header_rxbuffer; + iov[0].iov_len = vp->header_size; + } + + skb = prep_skb(vp, &hdr); + + if (skb == NULL) { + /* Read a packet into drop_buffer and don't do + * anything with it. + */ + iov[iovpos].iov_base = drop_buffer; + iov[iovpos].iov_len = DROP_BUFFER_SIZE; + hdr.msg_iovlen = 1; + vp->dev->stats.rx_dropped++; + } + + pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0); + + if (skb != NULL) { + if (pkt_len > vp->header_size) { + if (vp->header_size > 0) { + header_check = vp->verify_header( + vp->header_rxbuffer, skb, vp); + if (header_check < 0) { + dev_kfree_skb_irq(skb); + vp->dev->stats.rx_dropped++; + vp->estats.rx_encaps_errors++; + return 0; + } + if (header_check > 0) { + vp->estats.rx_csum_offload_good++; + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + } + pskb_trim(skb, pkt_len - vp->rx_header_size); + skb->protocol = eth_type_trans(skb, skb->dev); + vp->dev->stats.rx_bytes += skb->len; + vp->dev->stats.rx_packets++; + netif_rx(skb); + } else { + dev_kfree_skb_irq(skb); + } + } + return pkt_len; +} + +/* + * Packet at a time TX which falls back to vector TX if the + * underlying transport is busy. + */ + + + +static int writev_tx(struct vector_private *vp, struct sk_buff *skb) +{ + struct iovec iov[3 + MAX_IOV_SIZE]; + int iov_count, pkt_len = 0; + + iov[0].iov_base = vp->header_txbuffer; + iov_count = prep_msg(vp, skb, (struct iovec *) &iov); + + if (iov_count < 1) + goto drop; + pkt_len = uml_vector_writev( + vp->fds->tx_fd, + (struct iovec *) &iov, + iov_count + ); + + netif_trans_update(vp->dev); + netif_wake_queue(vp->dev); + + if (pkt_len > 0) { + vp->dev->stats.tx_bytes += skb->len; + vp->dev->stats.tx_packets++; + } else { + vp->dev->stats.tx_dropped++; + } + consume_skb(skb); + return pkt_len; +drop: + vp->dev->stats.tx_dropped++; + consume_skb(skb); + return pkt_len; +} + +/* + * Receive as many messages as we can in one call using the special + * mmsg vector matched to an skb vector which we prepared earlier. + */ + +static int vector_mmsg_rx(struct vector_private *vp) +{ + int packet_count, i; + struct vector_queue *qi = vp->rx_queue; + struct sk_buff *skb; + struct mmsghdr *mmsg_vector = qi->mmsg_vector; + void **skbuff_vector = qi->skbuff_vector; + int header_check; + + /* Refresh the vector and make sure it is with new skbs and the + * iovs are updated to point to them. + */ + + prep_queue_for_rx(qi); + + /* Fire the Lazy Gun - get as many packets as we can in one go. */ + + packet_count = uml_vector_recvmmsg( + vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0); + + if (packet_count <= 0) + return packet_count; + + /* We treat packet processing as enqueue, buffer refresh as dequeue + * The queue_depth tells us how many buffers have been used and how + * many do we need to prep the next time prep_queue_for_rx() is called. + */ + + qi->queue_depth = packet_count; + + for (i = 0; i < packet_count; i++) { + skb = (*skbuff_vector); + if (mmsg_vector->msg_len > vp->header_size) { + if (vp->header_size > 0) { + header_check = vp->verify_header( + mmsg_vector->msg_hdr.msg_iov->iov_base, + skb, + vp + ); + if (header_check < 0) { + /* Overlay header failed to verify - discard. + * We can actually keep this skb and reuse it, + * but that will make the prep logic too + * complex. + */ + dev_kfree_skb_irq(skb); + vp->estats.rx_encaps_errors++; + continue; + } + if (header_check > 0) { + vp->estats.rx_csum_offload_good++; + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + } + pskb_trim(skb, + mmsg_vector->msg_len - vp->rx_header_size); + skb->protocol = eth_type_trans(skb, skb->dev); + /* + * We do not need to lock on updating stats here + * The interrupt loop is non-reentrant. + */ + vp->dev->stats.rx_bytes += skb->len; + vp->dev->stats.rx_packets++; + netif_rx(skb); + } else { + /* Overlay header too short to do anything - discard. + * We can actually keep this skb and reuse it, + * but that will make the prep logic too complex. + */ + if (skb != NULL) + dev_kfree_skb_irq(skb); + } + (*skbuff_vector) = NULL; + /* Move to the next buffer element */ + mmsg_vector++; + skbuff_vector++; + } + if (packet_count > 0) { + if (vp->estats.rx_queue_max < packet_count) + vp->estats.rx_queue_max = packet_count; + vp->estats.rx_queue_running_average = + (vp->estats.rx_queue_running_average + packet_count) >> 1; + } + return packet_count; +} + +static void vector_rx(struct vector_private *vp) +{ + int err; + + if ((vp->options & VECTOR_RX) > 0) + while ((err = vector_mmsg_rx(vp)) > 0) + ; + else + while ((err = vector_legacy_rx(vp)) > 0) + ; + if ((err != 0) && net_ratelimit()) + netdev_err(vp->dev, "vector_rx: error(%d)\n", err); +} + +static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct vector_private *vp = netdev_priv(dev); + int queue_depth = 0; + + if ((vp->options & VECTOR_TX) == 0) { + writev_tx(vp, skb); + return NETDEV_TX_OK; + } + + /* We do BQL only in the vector path, no point doing it in + * packet at a time mode as there is no device queue + */ + + netdev_sent_queue(vp->dev, skb->len); + queue_depth = vector_enqueue(vp->tx_queue, skb); + + /* if the device queue is full, stop the upper layers and + * flush it. + */ + + if (queue_depth >= vp->tx_queue->max_depth - 1) { + vp->estats.tx_kicks++; + netif_stop_queue(dev); + vector_send(vp->tx_queue); + return NETDEV_TX_OK; + } + if (skb->xmit_more) { + mod_timer(&vp->tl, vp->coalesce); + return NETDEV_TX_OK; + } + if (skb->len < TX_SMALL_PACKET) { + vp->estats.tx_kicks++; + vector_send(vp->tx_queue); + } else + tasklet_schedule(&vp->tx_poll); + return NETDEV_TX_OK; +} + +static irqreturn_t vector_rx_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct vector_private *vp = netdev_priv(dev); + + if (!netif_running(dev)) + return IRQ_NONE; + vector_rx(vp); + return IRQ_HANDLED; + +} + +static irqreturn_t vector_tx_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct vector_private *vp = netdev_priv(dev); + + if (!netif_running(dev)) + return IRQ_NONE; + /* We need to pay attention to it only if we got + * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise + * we ignore it. In the future, it may be worth + * it to improve the IRQ controller a bit to make + * tweaking the IRQ mask less costly + */ + + if (vp->in_write_poll) + tasklet_schedule(&vp->tx_poll); + return IRQ_HANDLED; + +} + +static int irq_rr; + +static int vector_net_close(struct net_device *dev) +{ + struct vector_private *vp = netdev_priv(dev); + unsigned long flags; + + netif_stop_queue(dev); + del_timer(&vp->tl); + + if (vp->fds == NULL) + return 0; + + /* Disable and free all IRQS */ + if (vp->rx_irq > 0) { + um_free_irq(vp->rx_irq, dev); + vp->rx_irq = 0; + } + if (vp->tx_irq > 0) { + um_free_irq(vp->tx_irq, dev); + vp->tx_irq = 0; + } + tasklet_kill(&vp->tx_poll); + if (vp->fds->rx_fd > 0) { + os_close_file(vp->fds->rx_fd); + vp->fds->rx_fd = -1; + } + if (vp->fds->tx_fd > 0) { + os_close_file(vp->fds->tx_fd); + vp->fds->tx_fd = -1; + } + if (vp->bpf != NULL) + kfree(vp->bpf); + if (vp->fds->remote_addr != NULL) + kfree(vp->fds->remote_addr); + if (vp->transport_data != NULL) + kfree(vp->transport_data); + if (vp->header_rxbuffer != NULL) + kfree(vp->header_rxbuffer); + if (vp->header_txbuffer != NULL) + kfree(vp->header_txbuffer); + if (vp->rx_queue != NULL) + destroy_queue(vp->rx_queue); + if (vp->tx_queue != NULL) + destroy_queue(vp->tx_queue); + kfree(vp->fds); + vp->fds = NULL; + spin_lock_irqsave(&vp->lock, flags); + vp->opened = false; + spin_unlock_irqrestore(&vp->lock, flags); + return 0; +} + +/* TX tasklet */ + +static void vector_tx_poll(unsigned long data) +{ + struct vector_private *vp = (struct vector_private *)data; + + vp->estats.tx_kicks++; + vector_send(vp->tx_queue); +} +static void vector_reset_tx(struct work_struct *work) +{ + struct vector_private *vp = + container_of(work, struct vector_private, reset_tx); + netdev_reset_queue(vp->dev); + netif_start_queue(vp->dev); + netif_wake_queue(vp->dev); +} +static int vector_net_open(struct net_device *dev) +{ + struct vector_private *vp = netdev_priv(dev); + unsigned long flags; + int err = -EINVAL; + struct vector_device *vdevice; + + spin_lock_irqsave(&vp->lock, flags); + if (vp->opened) { + spin_unlock_irqrestore(&vp->lock, flags); + return -ENXIO; + } + vp->opened = true; + spin_unlock_irqrestore(&vp->lock, flags); + + vp->fds = uml_vector_user_open(vp->unit, vp->parsed); + + if (vp->fds == NULL) + goto out_close; + + if (build_transport_data(vp) < 0) + goto out_close; + + if ((vp->options & VECTOR_RX) > 0) { + vp->rx_queue = create_queue( + vp, + get_depth(vp->parsed), + vp->rx_header_size, + MAX_IOV_SIZE + ); + vp->rx_queue->queue_depth = get_depth(vp->parsed); + } else { + vp->header_rxbuffer = kmalloc( + vp->rx_header_size, + GFP_KERNEL + ); + if (vp->header_rxbuffer == NULL) + goto out_close; + } + if ((vp->options & VECTOR_TX) > 0) { + vp->tx_queue = create_queue( + vp, + get_depth(vp->parsed), + vp->header_size, + MAX_IOV_SIZE + ); + } else { + vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL); + if (vp->header_txbuffer == NULL) + goto out_close; + } + + /* READ IRQ */ + err = um_request_irq( + irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd, + IRQ_READ, vector_rx_interrupt, + IRQF_SHARED, dev->name, dev); + if (err != 0) { + netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err); + err = -ENETUNREACH; + goto out_close; + } + vp->rx_irq = irq_rr + VECTOR_BASE_IRQ; + dev->irq = irq_rr + VECTOR_BASE_IRQ; + irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE; + + /* WRITE IRQ - we need it only if we have vector TX */ + if ((vp->options & VECTOR_TX) > 0) { + err = um_request_irq( + irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd, + IRQ_WRITE, vector_tx_interrupt, + IRQF_SHARED, dev->name, dev); + if (err != 0) { + netdev_err(dev, + "vector_open: failed to get tx irq(%d)\n", err); + err = -ENETUNREACH; + goto out_close; + } + vp->tx_irq = irq_rr + VECTOR_BASE_IRQ; + irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE; + } + + if ((vp->options & VECTOR_QDISC_BYPASS) != 0) { + if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd)) + vp->options = vp->options | VECTOR_BPF; + } + + if ((vp->options & VECTOR_BPF) != 0) + vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr); + + netif_start_queue(dev); + + /* clear buffer - it can happen that the host side of the interface + * is full when we get here. In this case, new data is never queued, + * SIGIOs never arrive, and the net never works. + */ + + vector_rx(vp); + + vector_reset_stats(vp); + vdevice = find_device(vp->unit); + vdevice->opened = 1; + + if ((vp->options & VECTOR_TX) != 0) + add_timer(&vp->tl); + return 0; +out_close: + vector_net_close(dev); + return err; +} + + +static void vector_net_set_multicast_list(struct net_device *dev) +{ + /* TODO: - we can do some BPF games here */ + return; +} + +static void vector_net_tx_timeout(struct net_device *dev) +{ + struct vector_private *vp = netdev_priv(dev); + + vp->estats.tx_timeout_count++; + netif_trans_update(dev); + schedule_work(&vp->reset_tx); +} + +static netdev_features_t vector_fix_features(struct net_device *dev, + netdev_features_t features) +{ + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + return features; +} + +static int vector_set_features(struct net_device *dev, + netdev_features_t features) +{ + struct vector_private *vp = netdev_priv(dev); + /* Adjust buffer sizes for GSO/GRO. Unfortunately, there is + * no way to negotiate it on raw sockets, so we can change + * only our side. + */ + if (features & NETIF_F_GRO) + /* All new frame buffers will be GRO-sized */ + vp->req_size = 65536; + else + /* All new frame buffers will be normal sized */ + vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN; + return 0; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void vector_net_poll_controller(struct net_device *dev) +{ + disable_irq(dev->irq); + vector_rx_interrupt(dev->irq, dev); + enable_irq(dev->irq); +} +#endif + +static void vector_net_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver)); + strlcpy(info->version, DRIVER_VERSION, sizeof(info->version)); +} + +static void vector_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct vector_private *vp = netdev_priv(netdev); + + ring->rx_max_pending = vp->rx_queue->max_depth; + ring->tx_max_pending = vp->tx_queue->max_depth; + ring->rx_pending = vp->rx_queue->max_depth; + ring->tx_pending = vp->tx_queue->max_depth; +} + +static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf) +{ + switch (stringset) { + case ETH_SS_TEST: + *buf = '\0'; + break; + case ETH_SS_STATS: + memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); + break; + default: + WARN_ON(1); + break; + } +} + +static int vector_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_TEST: + return 0; + case ETH_SS_STATS: + return VECTOR_NUM_STATS; + default: + return -EOPNOTSUPP; + } +} + +static void vector_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *estats, + u64 *tmp_stats) +{ + struct vector_private *vp = netdev_priv(dev); + + memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats)); +} + +static int vector_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct vector_private *vp = netdev_priv(netdev); + + ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ; + return 0; +} + +static int vector_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct vector_private *vp = netdev_priv(netdev); + + vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000; + if (vp->coalesce == 0) + vp->coalesce = 1; + return 0; +} + +static const struct ethtool_ops vector_net_ethtool_ops = { + .get_drvinfo = vector_net_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ts_info = ethtool_op_get_ts_info, + .get_ringparam = vector_get_ringparam, + .get_strings = vector_get_strings, + .get_sset_count = vector_get_sset_count, + .get_ethtool_stats = vector_get_ethtool_stats, + .get_coalesce = vector_get_coalesce, + .set_coalesce = vector_set_coalesce, +}; + + +static const struct net_device_ops vector_netdev_ops = { + .ndo_open = vector_net_open, + .ndo_stop = vector_net_close, + .ndo_start_xmit = vector_net_start_xmit, + .ndo_set_rx_mode = vector_net_set_multicast_list, + .ndo_tx_timeout = vector_net_tx_timeout, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_fix_features = vector_fix_features, + .ndo_set_features = vector_set_features, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = vector_net_poll_controller, +#endif +}; + + +static void vector_timer_expire(struct timer_list *t) +{ + struct vector_private *vp = from_timer(vp, t, tl); + + vp->estats.tx_kicks++; + vector_send(vp->tx_queue); +} + +static void vector_eth_configure( + int n, + struct arglist *def + ) +{ + struct vector_device *device; + struct net_device *dev; + struct vector_private *vp; + int err; + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (device == NULL) { + printk(KERN_ERR "eth_configure failed to allocate struct " + "vector_device\n"); + return; + } + dev = alloc_etherdev(sizeof(struct vector_private)); + if (dev == NULL) { + printk(KERN_ERR "eth_configure: failed to allocate struct " + "net_device for vec%d\n", n); + goto out_free_device; + } + + dev->mtu = get_mtu(def); + + INIT_LIST_HEAD(&device->list); + device->unit = n; + + /* If this name ends up conflicting with an existing registered + * netdevice, that is OK, register_netdev{,ice}() will notice this + * and fail. + */ + snprintf(dev->name, sizeof(dev->name), "vec%d", n); + uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac")); + vp = netdev_priv(dev); + + /* sysfs register */ + if (!driver_registered) { + platform_driver_register(¨_net_driver); + driver_registered = 1; + } + device->pdev.id = n; + device->pdev.name = DRIVER_NAME; + device->pdev.dev.release = vector_device_release; + dev_set_drvdata(&device->pdev.dev, device); + if (platform_device_register(&device->pdev)) + goto out_free_netdev; + SET_NETDEV_DEV(dev, &device->pdev.dev); + + device->dev = dev; + + *vp = ((struct vector_private) + { + .list = LIST_HEAD_INIT(vp->list), + .dev = dev, + .unit = n, + .options = get_transport_options(def), + .rx_irq = 0, + .tx_irq = 0, + .parsed = def, + .max_packet = get_mtu(def) + ETH_HEADER_OTHER, + /* TODO - we need to calculate headroom so that ip header + * is 16 byte aligned all the time + */ + .headroom = get_headroom(def), + .form_header = NULL, + .verify_header = NULL, + .header_rxbuffer = NULL, + .header_txbuffer = NULL, + .header_size = 0, + .rx_header_size = 0, + .rexmit_scheduled = false, + .opened = false, + .transport_data = NULL, + .in_write_poll = false, + .coalesce = 2, + .req_size = get_req_size(def) + }); + + dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST); + tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp); + INIT_WORK(&vp->reset_tx, vector_reset_tx); + + timer_setup(&vp->tl, vector_timer_expire, 0); + spin_lock_init(&vp->lock); + + /* FIXME */ + dev->netdev_ops = &vector_netdev_ops; + dev->ethtool_ops = &vector_net_ethtool_ops; + dev->watchdog_timeo = (HZ >> 1); + /* primary IRQ - fixme */ + dev->irq = 0; /* we will adjust this once opened */ + + rtnl_lock(); + err = register_netdevice(dev); + rtnl_unlock(); + if (err) + goto out_undo_user_init; + + spin_lock(&vector_devices_lock); + list_add(&device->list, &vector_devices); + spin_unlock(&vector_devices_lock); + + return; + +out_undo_user_init: + return; +out_free_netdev: + free_netdev(dev); +out_free_device: + kfree(device); +} + + + + +/* + * Invoked late in the init + */ + +static int __init vector_init(void) +{ + struct list_head *ele; + struct vector_cmd_line_arg *def; + struct arglist *parsed; + + list_for_each(ele, &vec_cmd_line) { + def = list_entry(ele, struct vector_cmd_line_arg, list); + parsed = uml_parse_vector_ifspec(def->arguments); + if (parsed != NULL) + vector_eth_configure(def->unit, parsed); + } + return 0; +} + + +/* Invoked at initial argument parsing, only stores + * arguments until a proper vector_init is called + * later + */ + +static int __init vector_setup(char *str) +{ + char *error; + int n, err; + struct vector_cmd_line_arg *new; + + err = vector_parse(str, &n, &str, &error); + if (err) { + printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n", + str, error); + return 1; + } + new = alloc_bootmem(sizeof(*new)); + INIT_LIST_HEAD(&new->list); + new->unit = n; + new->arguments = str; + list_add_tail(&new->list, &vec_cmd_line); + return 1; +} + +__setup("vec", vector_setup); +__uml_help(vector_setup, +"vec[0-9]+:<option>=<value>,<option>=<value>\n" +" Configure a vector io network device.\n\n" +); + +late_initcall(vector_init); + +static struct mc_device vector_mc = { + .list = LIST_HEAD_INIT(vector_mc.list), + .name = "vec", + .config = vector_config, + .get_config = NULL, + .id = vector_id, + .remove = vector_remove, +}; + +#ifdef CONFIG_INET +static int vector_inetaddr_event( + struct notifier_block *this, + unsigned long event, + void *ptr) +{ + return NOTIFY_DONE; +} + +static struct notifier_block vector_inetaddr_notifier = { + .notifier_call = vector_inetaddr_event, +}; + +static void inet_register(void) +{ + register_inetaddr_notifier(&vector_inetaddr_notifier); +} +#else +static inline void inet_register(void) +{ +} +#endif + +static int vector_net_init(void) +{ + mconsole_register_dev(&vector_mc); + inet_register(); + return 0; +} + +__initcall(vector_net_init); + + + diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h new file mode 100644 index 000000000000..0b0a767b9076 --- /dev/null +++ b/arch/um/drivers/vector_kern.h @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_VECTOR_KERN_H +#define __UM_VECTOR_KERN_H + +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/list.h> +#include <linux/ctype.h> +#include <linux/workqueue.h> +#include <linux/interrupt.h> +#include "vector_user.h" + +/* Queue structure specially adapted for multiple enqueue/dequeue + * in a mmsgrecv/mmsgsend context + */ + +/* Dequeue method */ + +#define QUEUE_SENDMSG 0 +#define QUEUE_SENDMMSG 1 + +#define VECTOR_RX 1 +#define VECTOR_TX (1 << 1) +#define VECTOR_BPF (1 << 2) +#define VECTOR_QDISC_BYPASS (1 << 3) + +#define ETH_MAX_PACKET 1500 +#define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */ + +struct vector_queue { + struct mmsghdr *mmsg_vector; + void **skbuff_vector; + /* backlink to device which owns us */ + struct net_device *dev; + spinlock_t head_lock; + spinlock_t tail_lock; + int queue_depth, head, tail, max_depth, max_iov_frags; + short options; +}; + +struct vector_estats { + uint64_t rx_queue_max; + uint64_t rx_queue_running_average; + uint64_t tx_queue_max; + uint64_t tx_queue_running_average; + uint64_t rx_encaps_errors; + uint64_t tx_timeout_count; + uint64_t tx_restart_queue; + uint64_t tx_kicks; + uint64_t tx_flow_control_xon; + uint64_t tx_flow_control_xoff; + uint64_t rx_csum_offload_good; + uint64_t rx_csum_offload_errors; + uint64_t sg_ok; + uint64_t sg_linearized; +}; + +#define VERIFY_HEADER_NOK -1 +#define VERIFY_HEADER_OK 0 +#define VERIFY_CSUM_OK 1 + +struct vector_private { + struct list_head list; + spinlock_t lock; + struct net_device *dev; + + int unit; + + /* Timeout timer in TX */ + + struct timer_list tl; + + /* Scheduled "remove device" work */ + struct work_struct reset_tx; + struct vector_fds *fds; + + struct vector_queue *rx_queue; + struct vector_queue *tx_queue; + + int rx_irq; + int tx_irq; + + struct arglist *parsed; + + void *transport_data; /* transport specific params if needed */ + + int max_packet; + int req_size; /* different from max packet - used for TSO */ + int headroom; + + int options; + + /* remote address if any - some transports will leave this as null */ + + int header_size; + int rx_header_size; + int coalesce; + + void *header_rxbuffer; + void *header_txbuffer; + + int (*form_header)(uint8_t *header, + struct sk_buff *skb, struct vector_private *vp); + int (*verify_header)(uint8_t *header, + struct sk_buff *skb, struct vector_private *vp); + + spinlock_t stats_lock; + + struct tasklet_struct tx_poll; + bool rexmit_scheduled; + bool opened; + bool in_write_poll; + + /* ethtool stats */ + + struct vector_estats estats; + void *bpf; + + char user[0]; +}; + +extern int build_transport_data(struct vector_private *vp); + +#endif diff --git a/arch/um/drivers/vector_transports.c b/arch/um/drivers/vector_transports.c new file mode 100644 index 000000000000..9065047f844b --- /dev/null +++ b/arch/um/drivers/vector_transports.c @@ -0,0 +1,458 @@ +/* + * Copyright (C) 2017 - Cambridge Greys Limited + * Copyright (C) 2011 - 2014 Cisco Systems Inc + * Licensed under the GPL. + */ + +#include <linux/etherdevice.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <asm/byteorder.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/virtio_net.h> +#include <linux/virtio_net.h> +#include <linux/virtio_byteorder.h> +#include <linux/netdev_features.h> +#include "vector_user.h" +#include "vector_kern.h" + +#define GOOD_LINEAR 512 +#define GSO_ERROR "Incoming GSO frames and GRO disabled on the interface" + +struct gre_minimal_header { + uint16_t header; + uint16_t arptype; +}; + + +struct uml_gre_data { + uint32_t rx_key; + uint32_t tx_key; + uint32_t sequence; + + bool ipv6; + bool has_sequence; + bool pin_sequence; + bool checksum; + bool key; + struct gre_minimal_header expected_header; + + uint32_t checksum_offset; + uint32_t key_offset; + uint32_t sequence_offset; + +}; + +struct uml_l2tpv3_data { + uint64_t rx_cookie; + uint64_t tx_cookie; + uint64_t rx_session; + uint64_t tx_session; + uint32_t counter; + + bool udp; + bool ipv6; + bool has_counter; + bool pin_counter; + bool cookie; + bool cookie_is_64; + + uint32_t cookie_offset; + uint32_t session_offset; + uint32_t counter_offset; +}; + +static int l2tpv3_form_header(uint8_t *header, + struct sk_buff *skb, struct vector_private *vp) +{ + struct uml_l2tpv3_data *td = vp->transport_data; + uint32_t *counter; + + if (td->udp) + *(uint32_t *) header = cpu_to_be32(L2TPV3_DATA_PACKET); + (*(uint32_t *) (header + td->session_offset)) = td->tx_session; + + if (td->cookie) { + if (td->cookie_is_64) + (*(uint64_t *)(header + td->cookie_offset)) = + td->tx_cookie; + else + (*(uint32_t *)(header + td->cookie_offset)) = + td->tx_cookie; + } + if (td->has_counter) { + counter = (uint32_t *)(header + td->counter_offset); + if (td->pin_counter) { + *counter = 0; + } else { + td->counter++; + *counter = cpu_to_be32(td->counter); + } + } + return 0; +} + +static int gre_form_header(uint8_t *header, + struct sk_buff *skb, struct vector_private *vp) +{ + struct uml_gre_data *td = vp->transport_data; + uint32_t *sequence; + *((uint32_t *) header) = *((uint32_t *) &td->expected_header); + if (td->key) + (*(uint32_t *) (header + td->key_offset)) = td->tx_key; + if (td->has_sequence) { + sequence = (uint32_t *)(header + td->sequence_offset); + if (td->pin_sequence) + *sequence = 0; + else + *sequence = cpu_to_be32(++td->sequence); + } + return 0; +} + +static int raw_form_header(uint8_t *header, + struct sk_buff *skb, struct vector_private *vp) +{ + struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header; + + virtio_net_hdr_from_skb( + skb, + vheader, + virtio_legacy_is_little_endian(), + false + ); + + return 0; +} + +static int l2tpv3_verify_header( + uint8_t *header, struct sk_buff *skb, struct vector_private *vp) +{ + struct uml_l2tpv3_data *td = vp->transport_data; + uint32_t *session; + uint64_t cookie; + + if ((!td->udp) && (!td->ipv6)) + header += sizeof(struct iphdr) /* fix for ipv4 raw */; + + /* we do not do a strict check for "data" packets as per + * the RFC spec because the pure IP spec does not have + * that anyway. + */ + + if (td->cookie) { + if (td->cookie_is_64) + cookie = *(uint64_t *)(header + td->cookie_offset); + else + cookie = *(uint32_t *)(header + td->cookie_offset); + if (cookie != td->rx_cookie) { + if (net_ratelimit()) + netdev_err(vp->dev, "uml_l2tpv3: unknown cookie id"); + return -1; + } + } + session = (uint32_t *) (header + td->session_offset); + if (*session != td->rx_session) { + if (net_ratelimit()) + netdev_err(vp->dev, "uml_l2tpv3: session mismatch"); + return -1; + } + return 0; +} + +static int gre_verify_header( + uint8_t *header, struct sk_buff *skb, struct vector_private *vp) +{ + + uint32_t key; + struct uml_gre_data *td = vp->transport_data; + + if (!td->ipv6) + header += sizeof(struct iphdr) /* fix for ipv4 raw */; + + if (*((uint32_t *) header) != *((uint32_t *) &td->expected_header)) { + if (net_ratelimit()) + netdev_err(vp->dev, "header type disagreement, expecting %0x, got %0x", + *((uint32_t *) &td->expected_header), + *((uint32_t *) header) + ); + return -1; + } + + if (td->key) { + key = (*(uint32_t *)(header + td->key_offset)); + if (key != td->rx_key) { + if (net_ratelimit()) + netdev_err(vp->dev, "unknown key id %0x, expecting %0x", + key, td->rx_key); + return -1; + } + } + return 0; +} + +static int raw_verify_header( + uint8_t *header, struct sk_buff *skb, struct vector_private *vp) +{ + struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header; + + if ((vheader->gso_type != VIRTIO_NET_HDR_GSO_NONE) && + (vp->req_size != 65536)) { + if (net_ratelimit()) + netdev_err( + vp->dev, + GSO_ERROR + ); + } + if ((vheader->flags & VIRTIO_NET_HDR_F_DATA_VALID) > 0) + return 1; + + virtio_net_hdr_to_skb(skb, vheader, virtio_legacy_is_little_endian()); + return 0; +} + +static bool get_uint_param( + struct arglist *def, char *param, unsigned int *result) +{ + char *arg = uml_vector_fetch_arg(def, param); + + if (arg != NULL) { + if (kstrtoint(arg, 0, result) == 0) + return true; + } + return false; +} + +static bool get_ulong_param( + struct arglist *def, char *param, unsigned long *result) +{ + char *arg = uml_vector_fetch_arg(def, param); + + if (arg != NULL) { + if (kstrtoul(arg, 0, result) == 0) + return true; + return true; + } + return false; +} + +static int build_gre_transport_data(struct vector_private *vp) +{ + struct uml_gre_data *td; + int temp_int; + int temp_rx; + int temp_tx; + + vp->transport_data = kmalloc(sizeof(struct uml_gre_data), GFP_KERNEL); + if (vp->transport_data == NULL) + return -ENOMEM; + td = vp->transport_data; + td->sequence = 0; + + td->expected_header.arptype = GRE_IRB; + td->expected_header.header = 0; + + vp->form_header = &gre_form_header; + vp->verify_header = &gre_verify_header; + vp->header_size = 4; + td->key_offset = 4; + td->sequence_offset = 4; + td->checksum_offset = 4; + + td->ipv6 = false; + if (get_uint_param(vp->parsed, "v6", &temp_int)) { + if (temp_int > 0) + td->ipv6 = true; + } + td->key = false; + if (get_uint_param(vp->parsed, "rx_key", &temp_rx)) { + if (get_uint_param(vp->parsed, "tx_key", &temp_tx)) { + td->key = true; + td->expected_header.header |= GRE_MODE_KEY; + td->rx_key = cpu_to_be32(temp_rx); + td->tx_key = cpu_to_be32(temp_tx); + vp->header_size += 4; + td->sequence_offset += 4; + } else { + return -EINVAL; + } + } + + td->sequence = false; + if (get_uint_param(vp->parsed, "sequence", &temp_int)) { + if (temp_int > 0) { + vp->header_size += 4; + td->has_sequence = true; + td->expected_header.header |= GRE_MODE_SEQUENCE; + if (get_uint_param( + vp->parsed, "pin_sequence", &temp_int)) { + if (temp_int > 0) + td->pin_sequence = true; + } + } + } + vp->rx_header_size = vp->header_size; + if (!td->ipv6) + vp->rx_header_size += sizeof(struct iphdr); + return 0; +} + +static int build_l2tpv3_transport_data(struct vector_private *vp) +{ + + struct uml_l2tpv3_data *td; + int temp_int, temp_rxs, temp_txs; + unsigned long temp_rx; + unsigned long temp_tx; + + vp->transport_data = kmalloc( + sizeof(struct uml_l2tpv3_data), GFP_KERNEL); + + if (vp->transport_data == NULL) + return -ENOMEM; + + td = vp->transport_data; + + vp->form_header = &l2tpv3_form_header; + vp->verify_header = &l2tpv3_verify_header; + td->counter = 0; + + vp->header_size = 4; + td->session_offset = 0; + td->cookie_offset = 4; + td->counter_offset = 4; + + + td->ipv6 = false; + if (get_uint_param(vp->parsed, "v6", &temp_int)) { + if (temp_int > 0) + td->ipv6 = true; + } + + if (get_uint_param(vp->parsed, "rx_session", &temp_rxs)) { + if (get_uint_param(vp->parsed, "tx_session", &temp_txs)) { + td->tx_session = cpu_to_be32(temp_txs); + td->rx_session = cpu_to_be32(temp_rxs); + } else { + return -EINVAL; + } + } else { + return -EINVAL; + } + + td->cookie_is_64 = false; + if (get_uint_param(vp->parsed, "cookie64", &temp_int)) { + if (temp_int > 0) + td->cookie_is_64 = true; + } + td->cookie = false; + if (get_ulong_param(vp->parsed, "rx_cookie", &temp_rx)) { + if (get_ulong_param(vp->parsed, "tx_cookie", &temp_tx)) { + td->cookie = true; + if (td->cookie_is_64) { + td->rx_cookie = cpu_to_be64(temp_rx); + td->tx_cookie = cpu_to_be64(temp_tx); + vp->header_size += 8; + td->counter_offset += 8; + } else { + td->rx_cookie = cpu_to_be32(temp_rx); + td->tx_cookie = cpu_to_be32(temp_tx); + vp->header_size += 4; + td->counter_offset += 4; + } + } else { + return -EINVAL; + } + } + + td->has_counter = false; + if (get_uint_param(vp->parsed, "counter", &temp_int)) { + if (temp_int > 0) { + td->has_counter = true; + vp->header_size += 4; + if (get_uint_param( + vp->parsed, "pin_counter", &temp_int)) { + if (temp_int > 0) + td->pin_counter = true; + } + } + } + + if (get_uint_param(vp->parsed, "udp", &temp_int)) { + if (temp_int > 0) { + td->udp = true; + vp->header_size += 4; + td->counter_offset += 4; + td->session_offset += 4; + td->cookie_offset += 4; + } + } + + vp->rx_header_size = vp->header_size; + if ((!td->ipv6) && (!td->udp)) + vp->rx_header_size += sizeof(struct iphdr); + + return 0; +} + +static int build_raw_transport_data(struct vector_private *vp) +{ + if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) { + if (!uml_raw_enable_vnet_headers(vp->fds->tx_fd)) + return -1; + vp->form_header = &raw_form_header; + vp->verify_header = &raw_verify_header; + vp->header_size = sizeof(struct virtio_net_hdr); + vp->rx_header_size = sizeof(struct virtio_net_hdr); + vp->dev->hw_features |= (NETIF_F_TSO | NETIF_F_GRO); + vp->dev->features |= + (NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + NETIF_F_TSO | NETIF_F_GRO); + netdev_info( + vp->dev, + "raw: using vnet headers for tso and tx/rx checksum" + ); + } + return 0; +} + +static int build_tap_transport_data(struct vector_private *vp) +{ + if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) { + vp->form_header = &raw_form_header; + vp->verify_header = &raw_verify_header; + vp->header_size = sizeof(struct virtio_net_hdr); + vp->rx_header_size = sizeof(struct virtio_net_hdr); + vp->dev->hw_features |= + (NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO); + vp->dev->features |= + (NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO); + netdev_info( + vp->dev, + "tap/raw: using vnet headers for tso and tx/rx checksum" + ); + } else { + return 0; /* do not try to enable tap too if raw failed */ + } + if (uml_tap_enable_vnet_headers(vp->fds->tx_fd)) + return 0; + return -1; +} + +int build_transport_data(struct vector_private *vp) +{ + char *transport = uml_vector_fetch_arg(vp->parsed, "transport"); + + if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0) + return build_gre_transport_data(vp); + if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0) + return build_l2tpv3_transport_data(vp); + if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) + return build_raw_transport_data(vp); + if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) + return build_tap_transport_data(vp); + return 0; +} + diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c new file mode 100644 index 000000000000..4d6a78e31089 --- /dev/null +++ b/arch/um/drivers/vector_user.c @@ -0,0 +1,590 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdarg.h> +#include <errno.h> +#include <stddef.h> +#include <string.h> +#include <sys/ioctl.h> +#include <net/if.h> +#include <linux/if_tun.h> +#include <arpa/inet.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <net/ethernet.h> +#include <netinet/ip.h> +#include <netinet/ether.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <linux/virtio_net.h> +#include <netdb.h> +#include <stdlib.h> +#include <os.h> +#include <um_malloc.h> +#include "vector_user.h" + +#define ID_GRE 0 +#define ID_L2TPV3 1 +#define ID_MAX 1 + +#define TOKEN_IFNAME "ifname" + +#define TRANS_RAW "raw" +#define TRANS_RAW_LEN strlen(TRANS_RAW) + +#define VNET_HDR_FAIL "could not enable vnet headers on fd %d" +#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s" +#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i" +#define BPF_ATTACH_FAIL "Failed to attach filter size %d to %d, err %d\n" + +/* This is very ugly and brute force lookup, but it is done + * only once at initialization so not worth doing hashes or + * anything more intelligent + */ + +char *uml_vector_fetch_arg(struct arglist *ifspec, char *token) +{ + int i; + + for (i = 0; i < ifspec->numargs; i++) { + if (strcmp(ifspec->tokens[i], token) == 0) + return ifspec->values[i]; + } + return NULL; + +} + +struct arglist *uml_parse_vector_ifspec(char *arg) +{ + struct arglist *result; + int pos, len; + bool parsing_token = true, next_starts = true; + + if (arg == NULL) + return NULL; + result = uml_kmalloc(sizeof(struct arglist), UM_GFP_KERNEL); + if (result == NULL) + return NULL; + result->numargs = 0; + len = strlen(arg); + for (pos = 0; pos < len; pos++) { + if (next_starts) { + if (parsing_token) { + result->tokens[result->numargs] = arg + pos; + } else { + result->values[result->numargs] = arg + pos; + result->numargs++; + } + next_starts = false; + } + if (*(arg + pos) == '=') { + if (parsing_token) + parsing_token = false; + else + goto cleanup; + next_starts = true; + (*(arg + pos)) = '\0'; + } + if (*(arg + pos) == ',') { + parsing_token = true; + next_starts = true; + (*(arg + pos)) = '\0'; + } + } + return result; +cleanup: + printk(UM_KERN_ERR "vector_setup - Couldn't parse '%s'\n", arg); + kfree(result); + return NULL; +} + +/* + * Socket/FD configuration functions. These return an structure + * of rx and tx descriptors to cover cases where these are not + * the same (f.e. read via raw socket and write via tap). + */ + +#define PATH_NET_TUN "/dev/net/tun" + +static struct vector_fds *user_init_tap_fds(struct arglist *ifspec) +{ + struct ifreq ifr; + int fd = -1; + struct sockaddr_ll sock; + int err = -ENOMEM, offload; + char *iface; + struct vector_fds *result = NULL; + + iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME); + if (iface == NULL) { + printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n"); + goto tap_cleanup; + } + + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result == NULL) { + printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n"); + goto tap_cleanup; + } + result->rx_fd = -1; + result->tx_fd = -1; + result->remote_addr = NULL; + result->remote_addr_size = 0; + + /* TAP */ + + fd = open(PATH_NET_TUN, O_RDWR); + if (fd < 0) { + printk(UM_KERN_ERR "uml_tap: failed to open tun device\n"); + goto tap_cleanup; + } + result->tx_fd = fd; + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); + + err = ioctl(fd, TUNSETIFF, (void *) &ifr); + if (err != 0) { + printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n"); + goto tap_cleanup; + } + + offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6; + ioctl(fd, TUNSETOFFLOAD, offload); + + /* RAW */ + + fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); + if (fd == -1) { + printk(UM_KERN_ERR + "uml_tap: failed to create socket: %i\n", -errno); + goto tap_cleanup; + } + result->rx_fd = fd; + memset(&ifr, 0, sizeof(ifr)); + strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); + if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) { + printk(UM_KERN_ERR + "uml_tap: failed to set interface: %i\n", -errno); + goto tap_cleanup; + } + + sock.sll_family = AF_PACKET; + sock.sll_protocol = htons(ETH_P_ALL); + sock.sll_ifindex = ifr.ifr_ifindex; + + if (bind(fd, + (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) { + printk(UM_KERN_ERR + "user_init_tap: failed to bind raw pair, err %d\n", + -errno); + goto tap_cleanup; + } + return result; +tap_cleanup: + printk(UM_KERN_ERR "user_init_tap: init failed, error %d", err); + if (result != NULL) { + if (result->rx_fd >= 0) + os_close_file(result->rx_fd); + if (result->tx_fd >= 0) + os_close_file(result->tx_fd); + kfree(result); + } + return NULL; +} + + +static struct vector_fds *user_init_raw_fds(struct arglist *ifspec) +{ + struct ifreq ifr; + int rxfd = -1, txfd = -1; + struct sockaddr_ll sock; + int err = -ENOMEM; + char *iface; + struct vector_fds *result = NULL; + + iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME); + if (iface == NULL) + goto cleanup; + + rxfd = socket(AF_PACKET, SOCK_RAW, ETH_P_ALL); + if (rxfd == -1) { + err = -errno; + goto cleanup; + } + txfd = socket(AF_PACKET, SOCK_RAW, 0); /* Turn off RX on this fd */ + if (txfd == -1) { + err = -errno; + goto cleanup; + } + memset(&ifr, 0, sizeof(ifr)); + strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); + if (ioctl(rxfd, SIOCGIFINDEX, (void *) &ifr) < 0) { + err = -errno; + goto cleanup; + } + + sock.sll_family = AF_PACKET; + sock.sll_protocol = htons(ETH_P_ALL); + sock.sll_ifindex = ifr.ifr_ifindex; + + if (bind(rxfd, + (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) { + err = -errno; + goto cleanup; + } + + sock.sll_family = AF_PACKET; + sock.sll_protocol = htons(ETH_P_IP); + sock.sll_ifindex = ifr.ifr_ifindex; + + if (bind(txfd, + (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) { + err = -errno; + goto cleanup; + } + + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result != NULL) { + result->rx_fd = rxfd; + result->tx_fd = txfd; + result->remote_addr = NULL; + result->remote_addr_size = 0; + } + return result; +cleanup: + printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err); + if (rxfd >= 0) + os_close_file(rxfd); + if (txfd >= 0) + os_close_file(txfd); + if (result != NULL) + kfree(result); + return NULL; +} + + +bool uml_raw_enable_qdisc_bypass(int fd) +{ + int optval = 1; + + if (setsockopt(fd, + SOL_PACKET, PACKET_QDISC_BYPASS, + &optval, sizeof(optval)) != 0) { + return false; + } + return true; +} + +bool uml_raw_enable_vnet_headers(int fd) +{ + int optval = 1; + + if (setsockopt(fd, + SOL_PACKET, PACKET_VNET_HDR, + &optval, sizeof(optval)) != 0) { + printk(UM_KERN_INFO VNET_HDR_FAIL, fd); + return false; + } + return true; +} +bool uml_tap_enable_vnet_headers(int fd) +{ + unsigned int features; + int len = sizeof(struct virtio_net_hdr); + + if (ioctl(fd, TUNGETFEATURES, &features) == -1) { + printk(UM_KERN_INFO TUN_GET_F_FAIL, strerror(errno)); + return false; + } + if ((features & IFF_VNET_HDR) == 0) { + printk(UM_KERN_INFO "tapraw: No VNET HEADER support"); + return false; + } + ioctl(fd, TUNSETVNETHDRSZ, &len); + return true; +} + +static struct vector_fds *user_init_socket_fds(struct arglist *ifspec, int id) +{ + int err = -ENOMEM; + int fd = -1, gairet; + struct addrinfo srchints; + struct addrinfo dsthints; + bool v6, udp; + char *value; + char *src, *dst, *srcport, *dstport; + struct addrinfo *gairesult = NULL; + struct vector_fds *result = NULL; + + + value = uml_vector_fetch_arg(ifspec, "v6"); + v6 = false; + udp = false; + if (value != NULL) { + if (strtol((const char *) value, NULL, 10) > 0) + v6 = true; + } + + value = uml_vector_fetch_arg(ifspec, "udp"); + if (value != NULL) { + if (strtol((const char *) value, NULL, 10) > 0) + udp = true; + } + src = uml_vector_fetch_arg(ifspec, "src"); + dst = uml_vector_fetch_arg(ifspec, "dst"); + srcport = uml_vector_fetch_arg(ifspec, "srcport"); + dstport = uml_vector_fetch_arg(ifspec, "dstport"); + + memset(&dsthints, 0, sizeof(dsthints)); + + if (v6) + dsthints.ai_family = AF_INET6; + else + dsthints.ai_family = AF_INET; + + switch (id) { + case ID_GRE: + dsthints.ai_socktype = SOCK_RAW; + dsthints.ai_protocol = IPPROTO_GRE; + break; + case ID_L2TPV3: + if (udp) { + dsthints.ai_socktype = SOCK_DGRAM; + dsthints.ai_protocol = 0; + } else { + dsthints.ai_socktype = SOCK_RAW; + dsthints.ai_protocol = IPPROTO_L2TP; + } + break; + default: + printk(KERN_ERR "Unsupported socket type\n"); + return NULL; + } + memcpy(&srchints, &dsthints, sizeof(struct addrinfo)); + + gairet = getaddrinfo(src, srcport, &dsthints, &gairesult); + if ((gairet != 0) || (gairesult == NULL)) { + printk(UM_KERN_ERR + "socket_open : could not resolve src, error = %s", + gai_strerror(gairet) + ); + return NULL; + } + fd = socket(gairesult->ai_family, + gairesult->ai_socktype, gairesult->ai_protocol); + if (fd == -1) { + printk(UM_KERN_ERR + "socket_open : could not open socket, error = %d", + -errno + ); + goto cleanup; + } + if (bind(fd, + (struct sockaddr *) gairesult->ai_addr, + gairesult->ai_addrlen)) { + printk(UM_KERN_ERR L2TPV3_BIND_FAIL, errno); + goto cleanup; + } + + if (gairesult != NULL) + freeaddrinfo(gairesult); + + gairesult = NULL; + + gairet = getaddrinfo(dst, dstport, &dsthints, &gairesult); + if ((gairet != 0) || (gairesult == NULL)) { + printk(UM_KERN_ERR + "socket_open : could not resolve dst, error = %s", + gai_strerror(gairet) + ); + return NULL; + } + + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); + if (result != NULL) { + result->rx_fd = fd; + result->tx_fd = fd; + result->remote_addr = uml_kmalloc( + gairesult->ai_addrlen, UM_GFP_KERNEL); + if (result->remote_addr == NULL) + goto cleanup; + result->remote_addr_size = gairesult->ai_addrlen; + memcpy( + result->remote_addr, + gairesult->ai_addr, + gairesult->ai_addrlen + ); + } + freeaddrinfo(gairesult); + return result; +cleanup: + if (gairesult != NULL) + freeaddrinfo(gairesult); + printk(UM_KERN_ERR "user_init_socket: init failed, error %d", err); + if (fd >= 0) + os_close_file(fd); + if (result != NULL) { + if (result->remote_addr != NULL) + kfree(result->remote_addr); + kfree(result); + } + return NULL; +} + +struct vector_fds *uml_vector_user_open( + int unit, + struct arglist *parsed +) +{ + char *transport; + + if (parsed == NULL) { + printk(UM_KERN_ERR "no parsed config for unit %d\n", unit); + return NULL; + } + transport = uml_vector_fetch_arg(parsed, "transport"); + if (transport == NULL) { + printk(UM_KERN_ERR "missing transport for unit %d\n", unit); + return NULL; + } + if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) + return user_init_raw_fds(parsed); + if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) + return user_init_tap_fds(parsed); + if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0) + return user_init_socket_fds(parsed, ID_GRE); + if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0) + return user_init_socket_fds(parsed, ID_L2TPV3); + return NULL; +} + + +int uml_vector_sendmsg(int fd, void *hdr, int flags) +{ + int n; + + CATCH_EINTR(n = sendmsg(fd, (struct msghdr *) hdr, flags)); + if ((n < 0) && (errno == EAGAIN)) + return 0; + if (n >= 0) + return n; + else + return -errno; +} + +int uml_vector_recvmsg(int fd, void *hdr, int flags) +{ + int n; + + CATCH_EINTR(n = recvmsg(fd, (struct msghdr *) hdr, flags)); + if ((n < 0) && (errno == EAGAIN)) + return 0; + if (n >= 0) + return n; + else + return -errno; +} + +int uml_vector_writev(int fd, void *hdr, int iovcount) +{ + int n; + + CATCH_EINTR(n = writev(fd, (struct iovec *) hdr, iovcount)); + if ((n < 0) && (errno == EAGAIN)) + return 0; + if (n >= 0) + return n; + else + return -errno; +} + +int uml_vector_sendmmsg( + int fd, + void *msgvec, + unsigned int vlen, + unsigned int flags) +{ + int n; + + CATCH_EINTR(n = sendmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags)); + if ((n < 0) && (errno == EAGAIN)) + return 0; + if (n >= 0) + return n; + else + return -errno; +} + +int uml_vector_recvmmsg( + int fd, + void *msgvec, + unsigned int vlen, + unsigned int flags) +{ + int n; + + CATCH_EINTR( + n = recvmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags, 0)); + if ((n < 0) && (errno == EAGAIN)) + return 0; + if (n >= 0) + return n; + else + return -errno; +} +int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len) +{ + int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, bpf_len); + + if (err < 0) + printk(KERN_ERR BPF_ATTACH_FAIL, bpf_len, fd, -errno); + return err; +} + +#define DEFAULT_BPF_LEN 6 + +void *uml_vector_default_bpf(int fd, void *mac) +{ + struct sock_filter *bpf; + uint32_t *mac1 = (uint32_t *)(mac + 2); + uint16_t *mac2 = (uint16_t *) mac; + struct sock_fprog bpf_prog = { + .len = 6, + .filter = NULL, + }; + + bpf = uml_kmalloc( + sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL); + if (bpf != NULL) { + bpf_prog.filter = bpf; + /* ld [8] */ + bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 }; + /* jeq #0xMAC[2-6] jt 2 jf 5*/ + bpf[1] = (struct sock_filter){ 0x15, 0, 3, ntohl(*mac1)}; + /* ldh [6] */ + bpf[2] = (struct sock_filter){ 0x28, 0, 0, 0x00000006 }; + /* jeq #0xMAC[0-1] jt 4 jf 5 */ + bpf[3] = (struct sock_filter){ 0x15, 0, 1, ntohs(*mac2)}; + /* ret #0 */ + bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 }; + /* ret #0x40000 */ + bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 }; + if (uml_vector_attach_bpf( + fd, &bpf_prog, sizeof(struct sock_fprog)) < 0) { + kfree(bpf); + bpf = NULL; + } + } + return bpf; +} + diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h new file mode 100644 index 000000000000..d7cbff73b7ff --- /dev/null +++ b/arch/um/drivers/vector_user.h @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_VECTOR_USER_H +#define __UM_VECTOR_USER_H + +#define MAXVARGS 20 + +#define TOKEN_IFNAME "ifname" + +#define TRANS_RAW "raw" +#define TRANS_RAW_LEN strlen(TRANS_RAW) + +#define TRANS_TAP "tap" +#define TRANS_TAP_LEN strlen(TRANS_TAP) + + +#define TRANS_GRE "gre" +#define TRANS_GRE_LEN strlen(TRANS_RAW) + +#define TRANS_L2TPV3 "l2tpv3" +#define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3) + +#ifndef IPPROTO_GRE +#define IPPROTO_GRE 0x2F +#endif + +#define GRE_MODE_CHECKSUM cpu_to_be16(8 << 12) /* checksum */ +#define GRE_MODE_RESERVED cpu_to_be16(4 << 12) /* unused */ +#define GRE_MODE_KEY cpu_to_be16(2 << 12) /* KEY present */ +#define GRE_MODE_SEQUENCE cpu_to_be16(1 << 12) /* sequence */ + +#define GRE_IRB cpu_to_be16(0x6558) + +#define L2TPV3_DATA_PACKET 0x30000 + +/* IANA-assigned IP protocol ID for L2TPv3 */ + +#ifndef IPPROTO_L2TP +#define IPPROTO_L2TP 0x73 +#endif + +struct arglist { + int numargs; + char *tokens[MAXVARGS]; + char *values[MAXVARGS]; +}; + +/* Separating read and write FDs allows us to have different + * rx and tx method. Example - read tap via raw socket using + * recvmmsg, write using legacy tap write calls + */ + +struct vector_fds { + int rx_fd; + int tx_fd; + void *remote_addr; + int remote_addr_size; +}; + +#define VECTOR_READ 1 +#define VECTOR_WRITE (1 < 1) +#define VECTOR_HEADERS (1 < 2) + +extern struct arglist *uml_parse_vector_ifspec(char *arg); + +extern struct vector_fds *uml_vector_user_open( + int unit, + struct arglist *parsed +); + +extern char *uml_vector_fetch_arg( + struct arglist *ifspec, + char *token +); + +extern int uml_vector_recvmsg(int fd, void *hdr, int flags); +extern int uml_vector_sendmsg(int fd, void *hdr, int flags); +extern int uml_vector_writev(int fd, void *hdr, int iovcount); +extern int uml_vector_sendmmsg( + int fd, void *msgvec, + unsigned int vlen, + unsigned int flags +); +extern int uml_vector_recvmmsg( + int fd, + void *msgvec, + unsigned int vlen, + unsigned int flags +); +extern void *uml_vector_default_bpf(int fd, void *mac); +extern int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len); +extern bool uml_raw_enable_qdisc_bypass(int fd); +extern bool uml_raw_enable_vnet_headers(int fd); +extern bool uml_tap_enable_vnet_headers(int fd); + + +#endif diff --git a/arch/um/include/asm/asm-prototypes.h b/arch/um/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..5898a26daa0d --- /dev/null +++ b/arch/um/include/asm/asm-prototypes.h @@ -0,0 +1 @@ +#include <asm-generic/asm-prototypes.h> diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h index b5cdd3f91157..49ed3e35b35a 100644 --- a/arch/um/include/asm/irq.h +++ b/arch/um/include/asm/irq.h @@ -18,7 +18,19 @@ #define XTERM_IRQ 13 #define RANDOM_IRQ 14 +#ifdef CONFIG_UML_NET_VECTOR + +#define VECTOR_BASE_IRQ 15 +#define VECTOR_IRQ_SPACE 8 + +#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ) + +#else + #define LAST_IRQ RANDOM_IRQ + +#endif + #define NR_IRQS (LAST_IRQ + 1) #endif diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h index df5633053957..a7a6120f19d5 100644 --- a/arch/um/include/shared/irq_user.h +++ b/arch/um/include/shared/irq_user.h @@ -7,6 +7,7 @@ #define __IRQ_USER_H__ #include <sysdep/ptrace.h> +#include <stdbool.h> struct irq_fd { struct irq_fd *next; @@ -15,10 +16,17 @@ struct irq_fd { int type; int irq; int events; - int current_events; + bool active; + bool pending; + bool purge; }; -enum { IRQ_READ, IRQ_WRITE }; +#define IRQ_READ 0 +#define IRQ_WRITE 1 +#define IRQ_NONE 2 +#define MAX_IRQ_TYPE (IRQ_NONE + 1) + + struct siginfo; extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h index 012ac87d4900..40442b98b173 100644 --- a/arch/um/include/shared/net_kern.h +++ b/arch/um/include/shared/net_kern.h @@ -65,5 +65,7 @@ extern int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out, char **gate_addr); extern void register_transport(struct transport *new); extern unsigned short eth_protocol(struct sk_buff *skb); +extern void uml_net_setup_etheraddr(struct net_device *dev, char *str); + #endif diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index d8ddaf9790d2..048ae37eb5aa 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -290,15 +290,16 @@ extern void halt_skas(void); extern void reboot_skas(void); /* irq.c */ -extern int os_waiting_for_events(struct irq_fd *active_fds); -extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds); -extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, - struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2); -extern void os_free_irq_later(struct irq_fd *active_fds, - int irq, void *dev_id); -extern int os_get_pollfd(int i); -extern void os_set_pollfd(int i, int fd); +extern int os_waiting_for_events_epoll(void); +extern void *os_epoll_get_data_pointer(int index); +extern int os_epoll_triggered(int index, int events); +extern int os_event_mask(int irq_type); +extern int os_setup_epoll(void); +extern int os_add_epoll_fd(int events, int fd, void *data); +extern int os_mod_epoll_fd(int events, int fd, void *data); +extern int os_del_epoll_fd(int fd); extern void os_set_ioignore(void); +extern void os_close_epoll_fd(void); /* sigio.c */ extern int add_sigio_fd(int fd); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 23cb9350d47e..6b7f3827d6e4 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -1,4 +1,6 @@ /* + * Copyright (C) 2017 - Cambridge Greys Ltd + * Copyright (C) 2011 - 2014 Cisco Systems Inc * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: @@ -16,243 +18,362 @@ #include <as-layout.h> #include <kern_util.h> #include <os.h> +#include <irq_user.h> -/* - * This list is accessed under irq_lock, except in sigio_handler, - * where it is safe from being modified. IRQ handlers won't change it - - * if an IRQ source has vanished, it will be freed by free_irqs just - * before returning from sigio_handler. That will process a separate - * list of irqs to free, with its own locking, coming back here to - * remove list elements, taking the irq_lock to do so. + +/* When epoll triggers we do not know why it did so + * we can also have different IRQs for read and write. + * This is why we keep a small irq_fd array for each fd - + * one entry per IRQ type */ -static struct irq_fd *active_fds = NULL; -static struct irq_fd **last_irq_ptr = &active_fds; -extern void free_irqs(void); +struct irq_entry { + struct irq_entry *next; + int fd; + struct irq_fd *irq_array[MAX_IRQ_TYPE + 1]; +}; + +static struct irq_entry *active_fds; + +static DEFINE_SPINLOCK(irq_lock); + +static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs) +{ +/* + * irq->active guards against reentry + * irq->pending accumulates pending requests + * if pending is raised the irq_handler is re-run + * until pending is cleared + */ + if (irq->active) { + irq->active = false; + do { + irq->pending = false; + do_IRQ(irq->irq, regs); + } while (irq->pending && (!irq->purge)); + if (!irq->purge) + irq->active = true; + } else { + irq->pending = true; + } +} void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { - struct irq_fd *irq_fd; - int n; + struct irq_entry *irq_entry; + struct irq_fd *irq; + + int n, i, j; while (1) { - n = os_waiting_for_events(active_fds); + /* This is now lockless - epoll keeps back-referencesto the irqs + * which have trigger it so there is no need to walk the irq + * list and lock it every time. We avoid locking by turning off + * IO for a specific fd by executing os_del_epoll_fd(fd) before + * we do any changes to the actual data structures + */ + n = os_waiting_for_events_epoll(); + if (n <= 0) { if (n == -EINTR) continue; - else break; + else + break; } - for (irq_fd = active_fds; irq_fd != NULL; - irq_fd = irq_fd->next) { - if (irq_fd->current_events != 0) { - irq_fd->current_events = 0; - do_IRQ(irq_fd->irq, regs); + for (i = 0; i < n ; i++) { + /* Epoll back reference is the entry with 3 irq_fd + * leaves - one for each irq type. + */ + irq_entry = (struct irq_entry *) + os_epoll_get_data_pointer(i); + for (j = 0; j < MAX_IRQ_TYPE ; j++) { + irq = irq_entry->irq_array[j]; + if (irq == NULL) + continue; + if (os_epoll_triggered(i, irq->events) > 0) + irq_io_loop(irq, regs); + if (irq->purge) { + irq_entry->irq_array[j] = NULL; + kfree(irq); + } } } } +} + +static int assign_epoll_events_to_irq(struct irq_entry *irq_entry) +{ + int i; + int events = 0; + struct irq_fd *irq; - free_irqs(); + for (i = 0; i < MAX_IRQ_TYPE ; i++) { + irq = irq_entry->irq_array[i]; + if (irq != NULL) + events = irq->events | events; + } + if (events > 0) { + /* os_add_epoll will call os_mod_epoll if this already exists */ + return os_add_epoll_fd(events, irq_entry->fd, irq_entry); + } + /* No events - delete */ + return os_del_epoll_fd(irq_entry->fd); } -static DEFINE_SPINLOCK(irq_lock); + static int activate_fd(int irq, int fd, int type, void *dev_id) { - struct pollfd *tmp_pfd; - struct irq_fd *new_fd, *irq_fd; + struct irq_fd *new_fd; + struct irq_entry *irq_entry; + int i, err, events; unsigned long flags; - int events, err, n; err = os_set_fd_async(fd); if (err < 0) goto out; - err = -ENOMEM; - new_fd = kmalloc(sizeof(struct irq_fd), GFP_KERNEL); - if (new_fd == NULL) - goto out; + spin_lock_irqsave(&irq_lock, flags); - if (type == IRQ_READ) - events = UM_POLLIN | UM_POLLPRI; - else events = UM_POLLOUT; - *new_fd = ((struct irq_fd) { .next = NULL, - .id = dev_id, - .fd = fd, - .type = type, - .irq = irq, - .events = events, - .current_events = 0 } ); + /* Check if we have an entry for this fd */ err = -EBUSY; - spin_lock_irqsave(&irq_lock, flags); - for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) { - if ((irq_fd->fd == fd) && (irq_fd->type == type)) { - printk(KERN_ERR "Registering fd %d twice\n", fd); - printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq); - printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id, - dev_id); + for (irq_entry = active_fds; + irq_entry != NULL; irq_entry = irq_entry->next) { + if (irq_entry->fd == fd) + break; + } + + if (irq_entry == NULL) { + /* This needs to be atomic as it may be called from an + * IRQ context. + */ + irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC); + if (irq_entry == NULL) { + printk(KERN_ERR + "Failed to allocate new IRQ entry\n"); goto out_unlock; } + irq_entry->fd = fd; + for (i = 0; i < MAX_IRQ_TYPE; i++) + irq_entry->irq_array[i] = NULL; + irq_entry->next = active_fds; + active_fds = irq_entry; } - if (type == IRQ_WRITE) - fd = -1; - - tmp_pfd = NULL; - n = 0; + /* Check if we are trying to re-register an interrupt for a + * particular fd + */ - while (1) { - n = os_create_pollfd(fd, events, tmp_pfd, n); - if (n == 0) - break; + if (irq_entry->irq_array[type] != NULL) { + printk(KERN_ERR + "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n", + irq, fd, type, dev_id + ); + goto out_unlock; + } else { + /* New entry for this fd */ + + err = -ENOMEM; + new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC); + if (new_fd == NULL) + goto out_unlock; - /* - * n > 0 - * It means we couldn't put new pollfd to current pollfds - * and tmp_fds is NULL or too small for new pollfds array. - * Needed size is equal to n as minimum. - * - * Here we have to drop the lock in order to call - * kmalloc, which might sleep. - * If something else came in and changed the pollfds array - * so we will not be able to put new pollfd struct to pollfds - * then we free the buffer tmp_fds and try again. + events = os_event_mask(type); + + *new_fd = ((struct irq_fd) { + .id = dev_id, + .irq = irq, + .type = type, + .events = events, + .active = true, + .pending = false, + .purge = false + }); + /* Turn off any IO on this fd - allows us to + * avoid locking the IRQ loop */ - spin_unlock_irqrestore(&irq_lock, flags); - kfree(tmp_pfd); - - tmp_pfd = kmalloc(n, GFP_KERNEL); - if (tmp_pfd == NULL) - goto out_kfree; - - spin_lock_irqsave(&irq_lock, flags); + os_del_epoll_fd(irq_entry->fd); + irq_entry->irq_array[type] = new_fd; } - *last_irq_ptr = new_fd; - last_irq_ptr = &new_fd->next; - + /* Turn back IO on with the correct (new) IO event mask */ + assign_epoll_events_to_irq(irq_entry); spin_unlock_irqrestore(&irq_lock, flags); - - /* - * This calls activate_fd, so it has to be outside the critical - * section. - */ - maybe_sigio_broken(fd, (type == IRQ_READ)); + maybe_sigio_broken(fd, (type != IRQ_NONE)); return 0; - - out_unlock: +out_unlock: spin_unlock_irqrestore(&irq_lock, flags); - out_kfree: - kfree(new_fd); - out: +out: return err; } -static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) +/* + * Walk the IRQ list and dispose of any unused entries. + * Should be done under irq_lock. + */ + +static void garbage_collect_irq_entries(void) { - unsigned long flags; + int i; + bool reap; + struct irq_entry *walk; + struct irq_entry *previous = NULL; + struct irq_entry *to_free; - spin_lock_irqsave(&irq_lock, flags); - os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr); - spin_unlock_irqrestore(&irq_lock, flags); + if (active_fds == NULL) + return; + walk = active_fds; + while (walk != NULL) { + reap = true; + for (i = 0; i < MAX_IRQ_TYPE ; i++) { + if (walk->irq_array[i] != NULL) { + reap = false; + break; + } + } + if (reap) { + if (previous == NULL) + active_fds = walk->next; + else + previous->next = walk->next; + to_free = walk; + } else { + to_free = NULL; + } + walk = walk->next; + if (to_free != NULL) + kfree(to_free); + } } -struct irq_and_dev { - int irq; - void *dev; -}; +/* + * Walk the IRQ list and get the descriptor for our FD + */ -static int same_irq_and_dev(struct irq_fd *irq, void *d) +static struct irq_entry *get_irq_entry_by_fd(int fd) { - struct irq_and_dev *data = d; + struct irq_entry *walk = active_fds; - return ((irq->irq == data->irq) && (irq->id == data->dev)); + while (walk != NULL) { + if (walk->fd == fd) + return walk; + walk = walk->next; + } + return NULL; } -static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) -{ - struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq, - .dev = dev }); - free_irq_by_cb(same_irq_and_dev, &data); -} +/* + * Walk the IRQ list and dispose of an entry for a specific + * device, fd and number. Note - if sharing an IRQ for read + * and writefor the same FD it will be disposed in either case. + * If this behaviour is undesirable use different IRQ ids. + */ -static int same_fd(struct irq_fd *irq, void *fd) -{ - return (irq->fd == *((int *)fd)); -} +#define IGNORE_IRQ 1 +#define IGNORE_DEV (1<<1) -void free_irq_by_fd(int fd) +static void do_free_by_irq_and_dev( + struct irq_entry *irq_entry, + unsigned int irq, + void *dev, + int flags +) { - free_irq_by_cb(same_fd, &fd); + int i; + struct irq_fd *to_free; + + for (i = 0; i < MAX_IRQ_TYPE ; i++) { + if (irq_entry->irq_array[i] != NULL) { + if ( + ((flags & IGNORE_IRQ) || + (irq_entry->irq_array[i]->irq == irq)) && + ((flags & IGNORE_DEV) || + (irq_entry->irq_array[i]->id == dev)) + ) { + /* Turn off any IO on this fd - allows us to + * avoid locking the IRQ loop + */ + os_del_epoll_fd(irq_entry->fd); + to_free = irq_entry->irq_array[i]; + irq_entry->irq_array[i] = NULL; + assign_epoll_events_to_irq(irq_entry); + if (to_free->active) + to_free->purge = true; + else + kfree(to_free); + } + } + } } -/* Must be called with irq_lock held */ -static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out) +void free_irq_by_fd(int fd) { - struct irq_fd *irq; - int i = 0; - int fdi; + struct irq_entry *to_free; + unsigned long flags; - for (irq = active_fds; irq != NULL; irq = irq->next) { - if ((irq->fd == fd) && (irq->irq == irqnum)) - break; - i++; - } - if (irq == NULL) { - printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n", - fd); - goto out; - } - fdi = os_get_pollfd(i); - if ((fdi != -1) && (fdi != fd)) { - printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds " - "and pollfds, fd %d vs %d, need %d\n", irq->fd, - fdi, fd); - irq = NULL; - goto out; + spin_lock_irqsave(&irq_lock, flags); + to_free = get_irq_entry_by_fd(fd); + if (to_free != NULL) { + do_free_by_irq_and_dev( + to_free, + -1, + NULL, + IGNORE_IRQ | IGNORE_DEV + ); } - *index_out = i; - out: - return irq; + garbage_collect_irq_entries(); + spin_unlock_irqrestore(&irq_lock, flags); } +EXPORT_SYMBOL(free_irq_by_fd); -void reactivate_fd(int fd, int irqnum) +static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) { - struct irq_fd *irq; + struct irq_entry *to_free; unsigned long flags; - int i; spin_lock_irqsave(&irq_lock, flags); - irq = find_irq_by_fd(fd, irqnum, &i); - if (irq == NULL) { - spin_unlock_irqrestore(&irq_lock, flags); - return; + to_free = active_fds; + while (to_free != NULL) { + do_free_by_irq_and_dev( + to_free, + irq, + dev, + 0 + ); + to_free = to_free->next; } - os_set_pollfd(i, irq->fd); + garbage_collect_irq_entries(); spin_unlock_irqrestore(&irq_lock, flags); +} - add_sigio_fd(fd); + +void reactivate_fd(int fd, int irqnum) +{ + /** NOP - we do auto-EOI now **/ } void deactivate_fd(int fd, int irqnum) { - struct irq_fd *irq; + struct irq_entry *to_free; unsigned long flags; - int i; + os_del_epoll_fd(fd); spin_lock_irqsave(&irq_lock, flags); - irq = find_irq_by_fd(fd, irqnum, &i); - if (irq == NULL) { - spin_unlock_irqrestore(&irq_lock, flags); - return; + to_free = get_irq_entry_by_fd(fd); + if (to_free != NULL) { + do_free_by_irq_and_dev( + to_free, + irqnum, + NULL, + IGNORE_DEV + ); } - - os_set_pollfd(i, -1); + garbage_collect_irq_entries(); spin_unlock_irqrestore(&irq_lock, flags); - ignore_sigio_fd(fd); } EXPORT_SYMBOL(deactivate_fd); @@ -265,17 +386,28 @@ EXPORT_SYMBOL(deactivate_fd); */ int deactivate_all_fds(void) { - struct irq_fd *irq; - int err; + unsigned long flags; + struct irq_entry *to_free; - for (irq = active_fds; irq != NULL; irq = irq->next) { - err = os_clear_fd_async(irq->fd); - if (err) - return err; - } - /* If there is a signal already queued, after unblocking ignore it */ + spin_lock_irqsave(&irq_lock, flags); + /* Stop IO. The IRQ loop has no lock so this is our + * only way of making sure we are safe to dispose + * of all IRQ handlers + */ os_set_ioignore(); - + to_free = active_fds; + while (to_free != NULL) { + do_free_by_irq_and_dev( + to_free, + -1, + NULL, + IGNORE_IRQ | IGNORE_DEV + ); + to_free = to_free->next; + } + garbage_collect_irq_entries(); + spin_unlock_irqrestore(&irq_lock, flags); + os_close_epoll_fd(); return 0; } @@ -353,8 +485,11 @@ void __init init_IRQ(void) irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq); + for (i = 1; i < NR_IRQS; i++) irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); + /* Initialize EPOLL Loop */ + os_setup_epoll(); } /* diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 7f69d17de354..052de4c8acb2 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -121,12 +121,12 @@ static void __init um_timer_setup(void) clockevents_register_device(&timer_clockevent); } -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { long long nsecs = os_persistent_clock_emulation(); - set_normalized_timespec(ts, nsecs / NSEC_PER_SEC, - nsecs % NSEC_PER_SEC); + set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC, + nsecs % NSEC_PER_SEC); } void __init time_init(void) diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 2db18cbbb0ea..c0197097c86e 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -12,6 +12,7 @@ #include <sys/mount.h> #include <sys/socket.h> #include <sys/stat.h> +#include <sys/sysmacros.h> #include <sys/un.h> #include <sys/types.h> #include <os.h> diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c index b9afb74b79ad..365823010346 100644 --- a/arch/um/os-Linux/irq.c +++ b/arch/um/os-Linux/irq.c @@ -1,135 +1,147 @@ /* + * Copyright (C) 2017 - Cambridge Greys Ltd + * Copyright (C) 2011 - 2014 Cisco Systems Inc * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #include <stdlib.h> #include <errno.h> -#include <poll.h> +#include <sys/epoll.h> #include <signal.h> #include <string.h> #include <irq_user.h> #include <os.h> #include <um_malloc.h> +/* Epoll support */ + +static int epollfd = -1; + +#define MAX_EPOLL_EVENTS 64 + +static struct epoll_event epoll_events[MAX_EPOLL_EVENTS]; + +/* Helper to return an Epoll data pointer from an epoll event structure. + * We need to keep this one on the userspace side to keep includes separate + */ + +void *os_epoll_get_data_pointer(int index) +{ + return epoll_events[index].data.ptr; +} + +/* Helper to compare events versus the events in the epoll structure. + * Same as above - needs to be on the userspace side + */ + + +int os_epoll_triggered(int index, int events) +{ + return epoll_events[index].events & events; +} +/* Helper to set the event mask. + * The event mask is opaque to the kernel side, because it does not have + * access to the right includes/defines for EPOLL constants. + */ + +int os_event_mask(int irq_type) +{ + if (irq_type == IRQ_READ) + return EPOLLIN | EPOLLPRI; + if (irq_type == IRQ_WRITE) + return EPOLLOUT; + return 0; +} + /* - * Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd - * and os_free_irq_by_cb, which are called under irq_lock. + * Initial Epoll Setup */ -static struct pollfd *pollfds = NULL; -static int pollfds_num = 0; -static int pollfds_size = 0; +int os_setup_epoll(void) +{ + epollfd = epoll_create(MAX_EPOLL_EVENTS); + return epollfd; +} -int os_waiting_for_events(struct irq_fd *active_fds) +/* + * Helper to run the actual epoll_wait + */ +int os_waiting_for_events_epoll(void) { - struct irq_fd *irq_fd; - int i, n, err; + int n, err; - n = poll(pollfds, pollfds_num, 0); + n = epoll_wait(epollfd, + (struct epoll_event *) &epoll_events, MAX_EPOLL_EVENTS, 0); if (n < 0) { err = -errno; if (errno != EINTR) - printk(UM_KERN_ERR "os_waiting_for_events:" - " poll returned %d, errno = %d\n", n, errno); + printk( + UM_KERN_ERR "os_waiting_for_events:" + " epoll returned %d, error = %s\n", n, + strerror(errno) + ); return err; } - - if (n == 0) - return 0; - - irq_fd = active_fds; - - for (i = 0; i < pollfds_num; i++) { - if (pollfds[i].revents != 0) { - irq_fd->current_events = pollfds[i].revents; - pollfds[i].fd = -1; - } - irq_fd = irq_fd->next; - } return n; } -int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds) -{ - if (pollfds_num == pollfds_size) { - if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) { - /* return min size needed for new pollfds area */ - return (pollfds_size + 1) * sizeof(pollfds[0]); - } - - if (pollfds != NULL) { - memcpy(tmp_pfd, pollfds, - sizeof(pollfds[0]) * pollfds_size); - /* remove old pollfds */ - kfree(pollfds); - } - pollfds = tmp_pfd; - pollfds_size++; - } else - kfree(tmp_pfd); /* remove not used tmp_pfd */ - - pollfds[pollfds_num] = ((struct pollfd) { .fd = fd, - .events = events, - .revents = 0 }); - pollfds_num++; - - return 0; -} -void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, - struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2) +/* + * Helper to add a fd to epoll + */ +int os_add_epoll_fd(int events, int fd, void *data) { - struct irq_fd **prev; - int i = 0; - - prev = &active_fds; - while (*prev != NULL) { - if ((*test)(*prev, arg)) { - struct irq_fd *old_fd = *prev; - if ((pollfds[i].fd != -1) && - (pollfds[i].fd != (*prev)->fd)) { - printk(UM_KERN_ERR "os_free_irq_by_cb - " - "mismatch between active_fds and " - "pollfds, fd %d vs %d\n", - (*prev)->fd, pollfds[i].fd); - goto out; - } - - pollfds_num--; - - /* - * This moves the *whole* array after pollfds[i] - * (though it doesn't spot as such)! - */ - memmove(&pollfds[i], &pollfds[i + 1], - (pollfds_num - i) * sizeof(pollfds[0])); - if (*last_irq_ptr2 == &old_fd->next) - *last_irq_ptr2 = prev; - - *prev = (*prev)->next; - if (old_fd->type == IRQ_WRITE) - ignore_sigio_fd(old_fd->fd); - kfree(old_fd); - continue; - } - prev = &(*prev)->next; - i++; - } - out: - return; + struct epoll_event event; + int result; + + event.data.ptr = data; + event.events = events | EPOLLET; + result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event); + if ((result) && (errno == EEXIST)) + result = os_mod_epoll_fd(events, fd, data); + if (result) + printk("epollctl add err fd %d, %s\n", fd, strerror(errno)); + return result; } -int os_get_pollfd(int i) +/* + * Helper to mod the fd event mask and/or data backreference + */ +int os_mod_epoll_fd(int events, int fd, void *data) { - return pollfds[i].fd; + struct epoll_event event; + int result; + + event.data.ptr = data; + event.events = events; + result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event); + if (result) + printk(UM_KERN_ERR + "epollctl mod err fd %d, %s\n", fd, strerror(errno)); + return result; } -void os_set_pollfd(int i, int fd) +/* + * Helper to delete the epoll fd + */ +int os_del_epoll_fd(int fd) { - pollfds[i].fd = fd; + struct epoll_event event; + int result; + /* This is quiet as we use this as IO ON/OFF - so it is often + * invoked on a non-existent fd + */ + result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event); + return result; } void os_set_ioignore(void) { signal(SIGIO, SIG_IGN); } + +void os_close_epoll_fd(void) +{ + /* Needed so we do not leak an fd when rebooting */ + os_close_file(epollfd); +} diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index a86d7cc2c2d8..bf0acb8aad8b 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -16,6 +16,7 @@ #include <os.h> #include <sysdep/mcontext.h> #include <um_malloc.h> +#include <sys/ucontext.h> void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGTRAP] = relay_signal, @@ -159,7 +160,7 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { static void hard_handler(int sig, siginfo_t *si, void *p) { - struct ucontext *uc = p; + ucontext_t *uc = p; mcontext_t *mc = &uc->uc_mcontext; unsigned long pending = 1UL << sig; diff --git a/arch/unicore32/include/asm/cacheflush.h b/arch/unicore32/include/asm/cacheflush.h index a5e08e2d5d6d..1d9132b66039 100644 --- a/arch/unicore32/include/asm/cacheflush.h +++ b/arch/unicore32/include/asm/cacheflush.h @@ -170,10 +170,8 @@ extern void flush_cache_page(struct vm_area_struct *vma, #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); -#define flush_dcache_mmap_lock(mapping) \ - spin_lock_irq(&(mapping)->tree_lock) -#define flush_dcache_mmap_unlock(mapping) \ - spin_unlock_irq(&(mapping)->tree_lock) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) #define flush_icache_user_range(vma, page, addr, len) \ flush_dcache_page(page) diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h index 3bb0a29fd2d7..66bb9f6525c0 100644 --- a/arch/unicore32/include/asm/memory.h +++ b/arch/unicore32/include/asm/memory.h @@ -20,12 +20,6 @@ #include <mach/memory.h> /* - * Allow for constants defined here to be used from assembly code - * by prepending the UL suffix only with actual C code compilation. - */ -#define UL(x) _AC(x, UL) - -/* * PAGE_OFFSET - the virtual address of the start of the kernel image * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 199e15bd3ec5..ce8b4da07e35 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -122,12 +122,14 @@ struct x86_init_pci { * @guest_late_init: guest late init * @x2apic_available: X2APIC detection * @init_mem_mapping: setup early mappings during init_mem_mapping() + * @init_after_bootmem: guest init after boot allocator is finished */ struct x86_hyper_init { void (*init_platform)(void); void (*guest_late_init)(void); bool (*x2apic_available)(void); void (*init_mem_mapping)(void); + void (*init_after_bootmem)(void); }; /** diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index df92605d8724..14c057f29979 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -26,7 +26,7 @@ static inline void signal_compat_build_tests(void) * new fields are handled in copy_siginfo_to_user32()! */ BUILD_BUG_ON(NSIGILL != 11); - BUILD_BUG_ON(NSIGFPE != 14); + BUILD_BUG_ON(NSIGFPE != 15); BUILD_BUG_ON(NSIGSEGV != 7); BUILD_BUG_ON(NSIGBUS != 5); BUILD_BUG_ON(NSIGTRAP != 4); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ebda84a91510..3ab867603e81 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = { .guest_late_init = x86_init_noop, .x2apic_available = bool_x86_init_noop, .init_mem_mapping = x86_init_noop, + .init_after_bootmem = x86_init_noop, }, .acpi = { diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 396e1f0151ac..8008db2bddb3 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -778,6 +778,7 @@ void __init mem_init(void) free_all_bootmem(); after_bootmem = 1; + x86_init.hyper.init_after_bootmem(); mem_init_print_info(NULL); printk(KERN_INFO "virtual kernel memory layout:\n" diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index dca9abf2b85c..66de40e45f58 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1185,6 +1185,7 @@ void __init mem_init(void) /* this will put all memory onto the freelists */ free_all_bootmem(); after_bootmem = 1; + x86_init.hyper.init_after_bootmem(); /* * Must be done after boot memory is put on freelist, because here we diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 155ecbac9e28..48c591251600 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -90,9 +90,10 @@ unsigned long arch_mmap_rnd(void) return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits); } -static unsigned long mmap_base(unsigned long rnd, unsigned long task_size) +static unsigned long mmap_base(unsigned long rnd, unsigned long task_size, + struct rlimit *rlim_stack) { - unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap; unsigned long gap_min, gap_max; @@ -126,16 +127,17 @@ static unsigned long mmap_legacy_base(unsigned long rnd, * process VM image, sets up which VM layout function to use: */ static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base, - unsigned long random_factor, unsigned long task_size) + unsigned long random_factor, unsigned long task_size, + struct rlimit *rlim_stack) { *legacy_base = mmap_legacy_base(random_factor, task_size); if (mmap_is_legacy()) *base = *legacy_base; else - *base = mmap_base(random_factor, task_size); + *base = mmap_base(random_factor, task_size, rlim_stack); } -void arch_pick_mmap_layout(struct mm_struct *mm) +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { if (mmap_is_legacy()) mm->get_unmapped_area = arch_get_unmapped_area; @@ -143,7 +145,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->get_unmapped_area = arch_get_unmapped_area_topdown; arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base, - arch_rnd(mmap64_rnd_bits), task_size_64bit(0)); + arch_rnd(mmap64_rnd_bits), task_size_64bit(0), + rlim_stack); #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES /* @@ -153,7 +156,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * mmap_base, the compat syscall uses mmap_compat_base. */ arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base, - arch_rnd(mmap32_rnd_bits), task_size_32bit()); + arch_rnd(mmap32_rnd_bits), task_size_32bit(), + rlim_stack); #endif } diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c index 1518d2805ae8..27361cbb7ca9 100644 --- a/arch/x86/um/stub_segv.c +++ b/arch/x86/um/stub_segv.c @@ -6,11 +6,12 @@ #include <sysdep/stub.h> #include <sysdep/faultinfo.h> #include <sysdep/mcontext.h> +#include <sys/ucontext.h> void __attribute__ ((__section__ (".__syscall_stub"))) stub_segv_handler(int sig, siginfo_t *info, void *p) { - struct ucontext *uc = p; + ucontext_t *uc = p; GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA), &uc->uc_mcontext); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 3c2c2530737e..c36d23aa6c35 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1259,10 +1259,6 @@ asmlinkage __visible void __init xen_start_kernel(void) */ __userpte_alloc_gfp &= ~__GFP_HIGHMEM; - /* Work out if we support NX */ - get_cpu_cap(&boot_cpu_data); - x86_configure_nx(); - /* Get mfn list */ xen_build_dynamic_phys_to_machine(); @@ -1272,6 +1268,10 @@ asmlinkage __visible void __init xen_start_kernel(void) */ xen_setup_gdt(0); + /* Work out if we support NX */ + get_cpu_cap(&boot_cpu_data); + x86_configure_nx(); + xen_init_irq_ops(); /* Let's presume PV guests always boot on vCPU with id 0. */ diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index d20763472920..486c0a34d00b 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -116,6 +116,8 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ static phys_addr_t xen_pt_base, xen_pt_size __initdata; +static DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready); + /* * Just beyond the highest usermode address. STACK_TOP_MAX has a * redzone above it, so round it up to a PGD boundary. @@ -155,11 +157,18 @@ void make_lowmem_page_readwrite(void *vaddr) } +/* + * During early boot all page table pages are pinned, but we do not have struct + * pages, so return true until struct pages are ready. + */ static bool xen_page_pinned(void *ptr) { - struct page *page = virt_to_page(ptr); + if (static_branch_likely(&xen_struct_pages_ready)) { + struct page *page = virt_to_page(ptr); - return PagePinned(page); + return PagePinned(page); + } + return true; } static void xen_extend_mmu_update(const struct mmu_update *update) @@ -836,11 +845,6 @@ void xen_mm_pin_all(void) spin_unlock(&pgd_lock); } -/* - * The init_mm pagetable is really pinned as soon as its created, but - * that's before we have page structures to store the bits. So do all - * the book-keeping now. - */ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, enum pt_level level) { @@ -848,8 +852,18 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, return 0; } -static void __init xen_mark_init_mm_pinned(void) +/* + * The init_mm pagetable is really pinned as soon as its created, but + * that's before we have page structures to store the bits. So do all + * the book-keeping now once struct pages for allocated pages are + * initialized. This happens only after free_all_bootmem() is called. + */ +static void __init xen_after_bootmem(void) { + static_branch_enable(&xen_struct_pages_ready); +#ifdef CONFIG_X86_64 + SetPagePinned(virt_to_page(level3_user_vsyscall)); +#endif xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } @@ -1623,14 +1637,15 @@ static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot) static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) { - bool pinned = PagePinned(virt_to_page(mm->pgd)); + bool pinned = xen_page_pinned(mm->pgd); trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned); if (pinned) { struct page *page = pfn_to_page(pfn); - SetPagePinned(page); + if (static_branch_likely(&xen_struct_pages_ready)) + SetPagePinned(page); if (!PageHighMem(page)) { xen_mc_batch(); @@ -2364,9 +2379,7 @@ static void __init xen_post_allocator_init(void) #ifdef CONFIG_X86_64 pv_mmu_ops.write_cr3 = &xen_write_cr3; - SetPagePinned(virt_to_page(level3_user_vsyscall)); #endif - xen_mark_init_mm_pinned(); } static void xen_leave_lazy_mmu(void) @@ -2450,6 +2463,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { void __init xen_init_mmu_ops(void) { x86_init.paging.pagetable_init = xen_pagetable_init; + x86_init.hyper.init_after_bootmem = xen_after_bootmem; pv_mmu_ops = xen_mmu_ops; diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index c0c756c76afe..2e20ae2fa2d6 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -425,6 +425,7 @@ static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */ * data back is to call: */ tick_nohz_idle_enter(); + tick_nohz_idle_stop_tick_protected(); cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 96f26e026783..5077ead5e59c 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -89,7 +89,9 @@ END(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb") ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, - .long (1 << XENFEAT_writable_page_tables) | (1 << XENFEAT_dom0)) + .long (1 << XENFEAT_writable_page_tables) | \ + (1 << XENFEAT_dom0) | \ + (1 << XENFEAT_linux_rsdp_unrestricted)) ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index 3e9d01ada81f..58f29a9d895d 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -57,6 +57,7 @@ #define MAP_NONBLOCK 0x20000 /* do not block on IO */ #define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x80000 /* create a huge page mapping */ +#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ #ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED # define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be * uninitialized */ |