From 40c42076ebd362dc69210cccea101ac80b6d4bd4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 12 Aug 2008 17:52:51 -0500 Subject: lguest: don't set MAC address for guest unless specified This shows up when trying to bridge: tap0: received packet with own address as source address As Max Krasnyansky points out, there's no reason to give the guest the same mac address as the TUN device. Signed-off-by: Rusty Russell Cc: Max Krasnyansky --- Documentation/lguest/lguest.c | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index b88b0ea54e90..655414821edc 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -1447,21 +1447,6 @@ static void configure_device(int fd, const char *tapif, u32 ipaddr) err(1, "Bringing interface %s up", tapif); } -static void get_mac(int fd, const char *tapif, unsigned char hwaddr[6]) -{ - struct ifreq ifr; - - memset(&ifr, 0, sizeof(ifr)); - strcpy(ifr.ifr_name, tapif); - - /* SIOC stands for Socket I/O Control. G means Get (vs S for Set - * above). IF means Interface, and HWADDR is hardware address. - * Simple! */ - if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) - err(1, "getting hw address for %s", tapif); - memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6); -} - static int get_tun_device(char tapif[IFNAMSIZ]) { struct ifreq ifr; @@ -1531,11 +1516,8 @@ static void setup_tun_net(char *arg) p = strchr(arg, ':'); if (p) { str2mac(p+1, conf.mac); + add_feature(dev, VIRTIO_NET_F_MAC); *p = '\0'; - } else { - p = arg + strlen(arg); - /* None supplied; query the randomly assigned mac. */ - get_mac(ipfd, tapif, conf.mac); } /* arg is now either an IP address or a bridge name */ @@ -1547,13 +1529,10 @@ static void setup_tun_net(char *arg) /* Set up the tun device. */ configure_device(ipfd, tapif, ip); - /* Tell Guest what MAC address to use. */ - add_feature(dev, VIRTIO_NET_F_MAC); add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); /* Expect Guest to handle everything except UFO */ add_feature(dev, VIRTIO_NET_F_CSUM); add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); - add_feature(dev, VIRTIO_NET_F_MAC); add_feature(dev, VIRTIO_NET_F_GUEST_TSO4); add_feature(dev, VIRTIO_NET_F_GUEST_TSO6); add_feature(dev, VIRTIO_NET_F_GUEST_ECN); -- cgit v1.2.1 From 912985dce45ef18fcdd9f5439fef054e0e22302a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 12 Aug 2008 17:52:52 -0500 Subject: mm: Make generic weak get_user_pages_fast and EXPORT_GPL it Out of line get_user_pages_fast fallback implementation, make it a weak symbol, get rid of CONFIG_HAVE_GET_USER_PAGES_FAST. Export the symbol to modules so lguest can use it. Signed-off-by: Nick Piggin Signed-off-by: Rusty Russell --- arch/powerpc/Kconfig | 3 --- arch/x86/Kconfig | 1 - arch/x86/mm/Makefile | 3 +-- include/linux/mm.h | 20 -------------------- mm/Kconfig | 3 --- mm/util.c | 15 +++++++++++++++ 6 files changed, 16 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 63c9cafda9c4..587da5e0990f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -42,9 +42,6 @@ config GENERIC_HARDIRQS bool default y -config HAVE_GET_USER_PAGES_FAST - def_bool PPC64 - config HAVE_SETUP_PER_CPU_AREA def_bool PPC64 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3d0f2b6a5a16..ac2fb0641a04 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -22,7 +22,6 @@ config X86 select HAVE_IDE select HAVE_OPROFILE select HAVE_IOREMAP_PROT - select HAVE_GET_USER_PAGES_FAST select HAVE_KPROBES select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_KRETPROBES diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 2977ea37791f..dfb932dcf136 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,7 +1,6 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o + pat.o pgtable.o gup.o -obj-$(CONFIG_HAVE_GET_USER_PAGES_FAST) += gup.o obj-$(CONFIG_X86_32) += pgtable_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/include/linux/mm.h b/include/linux/mm.h index 335288bff1b7..fa651609b65d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -834,7 +834,6 @@ extern int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags); -#ifdef CONFIG_HAVE_GET_USER_PAGES_FAST /* * get_user_pages_fast provides equivalent functionality to get_user_pages, * operating on current and current->mm (force=0 and doesn't return any vmas). @@ -848,25 +847,6 @@ extern int mprotect_fixup(struct vm_area_struct *vma, int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); -#else -/* - * Should probably be moved to asm-generic, and architectures can include it if - * they don't implement their own get_user_pages_fast. - */ -#define get_user_pages_fast(start, nr_pages, write, pages) \ -({ \ - struct mm_struct *mm = current->mm; \ - int ret; \ - \ - down_read(&mm->mmap_sem); \ - ret = get_user_pages(current, mm, start, nr_pages, \ - write, 0, pages, NULL); \ - up_read(&mm->mmap_sem); \ - \ - ret; \ -}) -#endif - /* * A callback you can register to apply pressure to ageable caches. * diff --git a/mm/Kconfig b/mm/Kconfig index 446c6588c753..0bd9c2dbb2a0 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -77,9 +77,6 @@ config FLAT_NODE_MEM_MAP def_bool y depends on !SPARSEMEM -config HAVE_GET_USER_PAGES_FAST - bool - # # Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's # to represent different areas of memory. This variable allows diff --git a/mm/util.c b/mm/util.c index 9341ca77bd88..cb00b748ce47 100644 --- a/mm/util.c +++ b/mm/util.c @@ -171,3 +171,18 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->unmap_area = arch_unmap_area; } #endif + +int __attribute__((weak)) get_user_pages_fast(unsigned long start, + int nr_pages, int write, struct page **pages) +{ + struct mm_struct *mm = current->mm; + int ret; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, nr_pages, + write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + return ret; +} +EXPORT_SYMBOL_GPL(get_user_pages_fast); -- cgit v1.2.1 From 71a3f4edc11b9dd7af28d003acbbd33496003da1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 12 Aug 2008 17:52:53 -0500 Subject: lguest: use get_user_pages_fast() instead of get_user_pages() Using a simple page table thrashing program I measure a slight improvement. The program creates five processes. Each touches 1000 pages then schedules the next process. We repeat this 1000 times. As lguest only caches 4 cr3 values, this rebuilds a lot of shadow page tables requiring virt->phys mappings. Before: 5.93 seconds After: 5.40 seconds (Counts of slow vs fastpath in this usage are 6092 and 2852462 respectively.) And more importantly for lguest, the code is simpler. Signed-off-by: Rusty Russell --- drivers/lguest/page_tables.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index d93500f24fbb..81d0c6053447 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -108,9 +108,8 @@ static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr) } /*:*/ -/*M:014 get_pfn is slow; it takes the mmap sem and calls get_user_pages. We - * could probably try to grab batches of pages here as an optimization - * (ie. pre-faulting). :*/ +/*M:014 get_pfn is slow: we could probably try to grab batches of pages here as + * an optimization (ie. pre-faulting). :*/ /*H:350 This routine takes a page number given by the Guest and converts it to * an actual, physical page number. It can fail for several reasons: the @@ -123,19 +122,13 @@ static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr) static unsigned long get_pfn(unsigned long virtpfn, int write) { struct page *page; - /* This value indicates failure. */ - unsigned long ret = -1UL; - /* get_user_pages() is a complex interface: it gets the "struct - * vm_area_struct" and "struct page" assocated with a range of pages. - * It also needs the task's mmap_sem held, and is not very quick. - * It returns the number of pages it got. */ - down_read(¤t->mm->mmap_sem); - if (get_user_pages(current, current->mm, virtpfn << PAGE_SHIFT, - 1, write, 1, &page, NULL) == 1) - ret = page_to_pfn(page); - up_read(¤t->mm->mmap_sem); - return ret; + /* gup me one page at this address please! */ + if (get_user_pages_fast(virtpfn << PAGE_SHIFT, 1, write, &page) == 1) + return page_to_pfn(page); + + /* This value indicates failure. */ + return -1UL; } /*H:340 Converting a Guest page table entry to a shadow (ie. real) page table @@ -174,7 +167,7 @@ static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) /*H:460 And to complete the chain, release_pte() looks like this: */ static void release_pte(pte_t pte) { - /* Remember that get_user_pages() took a reference to the page, in + /* Remember that get_user_pages_fast() took a reference to the page, in * get_pfn()? We have to put it back now. */ if (pte_flags(pte) & _PAGE_PRESENT) put_page(pfn_to_page(pte_pfn(pte))); -- cgit v1.2.1 From 4bceba417a795b78a5146e3f85291cb7bb2402ef Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 8 Aug 2008 11:15:07 +0200 Subject: export virtio_rng.h Hello Rusty, The entropy device was added after we exported all virtio headers. This patch adds virtio_rng.h to the exportable userspace headers. Signed-off-by: Christian Borntraeger Signed-off-by: Rusty Russell --- include/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/Kbuild b/include/linux/Kbuild index a26f565e8189..327f60658d94 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -356,6 +356,7 @@ unifdef-y += virtio_balloon.h unifdef-y += virtio_console.h unifdef-y += virtio_pci.h unifdef-y += virtio_ring.h +unifdef-y += virtio_rng.h unifdef-y += vt.h unifdef-y += wait.h unifdef-y += wanrouter.h -- cgit v1.2.1 From 59f9415ffb9759e950d775f4c400f747b332cc02 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 30 Jul 2008 12:49:02 -0700 Subject: modules: extend initcall_debug functionality to the module loader The kernel has this really nice facility where if you put "initcall_debug" on the kernel commandline, it'll print which function it's going to execute just before calling an initcall, and then after the call completes it will 1) print if it had an error code 2) checks for a few simple bugs (like leaving irqs off) and 3) print how long the init call took in milliseconds. While trying to optimize the boot speed of my laptop, I have been loving number 3 to figure out what to optimize... ... and then I wished that the same thing was done for module loading. This patch makes the module loader use this exact same functionality; it's a logical extension in my view (since modules are just sort of late binding initcalls anyway) and so far I've found it quite useful in finding where things are too slow in my boot. Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell --- include/linux/init.h | 1 + init/main.c | 6 ++++-- kernel/module.c | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/linux/init.h b/include/linux/init.h index 11b84e106053..93538b696e3d 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -139,6 +139,7 @@ extern initcall_t __con_initcall_start[], __con_initcall_end[]; extern initcall_t __security_initcall_start[], __security_initcall_end[]; /* Defined in init/main.c */ +extern int do_one_initcall(initcall_t fn); extern char __initdata boot_command_line[]; extern char *saved_command_line; extern unsigned int reset_devices; diff --git a/init/main.c b/init/main.c index 0bc7e167bf45..f6f7042331dc 100644 --- a/init/main.c +++ b/init/main.c @@ -691,7 +691,7 @@ asmlinkage void __init start_kernel(void) rest_init(); } -static int __initdata initcall_debug; +static int initcall_debug; static int __init initcall_debug_setup(char *str) { @@ -700,7 +700,7 @@ static int __init initcall_debug_setup(char *str) } __setup("initcall_debug", initcall_debug_setup); -static void __init do_one_initcall(initcall_t fn) +int do_one_initcall(initcall_t fn) { int count = preempt_count(); ktime_t t0, t1, delta; @@ -740,6 +740,8 @@ static void __init do_one_initcall(initcall_t fn) print_fn_descriptor_symbol(KERN_WARNING "initcall %s", fn); printk(" returned with %s\n", msgbuf); } + + return result; } diff --git a/kernel/module.c b/kernel/module.c index 61d212120df4..08864d257eb0 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2288,7 +2288,7 @@ sys_init_module(void __user *umod, /* Start the module */ if (mod->init != NULL) - ret = mod->init(); + ret = do_one_initcall(mod->init); if (ret < 0) { /* Init routine failed: abort. Try to protect us from buggy refcounters. */ -- cgit v1.2.1 From ed6d68763b8b589c0ae9d231cbd72bd01f6685c5 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 31 Jul 2008 10:31:02 +0800 Subject: stop_machine: remove unused variable Signed-off-by: Li Zefan Signed-off-by: Rusty Russell --- kernel/stop_machine.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index e446c7c7d6a9..af3c7cea258b 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -65,7 +65,6 @@ static void ack_state(void) static int stop_cpu(struct stop_machine_data *smdata) { enum stopmachine_state curstate = STOPMACHINE_NONE; - int uninitialized_var(ret); /* Simple state machine */ do { -- cgit v1.2.1 From b1b135c8d619cb2c7045d6ee4e48375882518bb5 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 7 Aug 2008 09:18:34 +0200 Subject: fix spinlock recursion in hvc_console commit 611e097d7707741a336a0677d9d69bec40f29f3d Author: Christian Borntraeger hvc_console: rework setup to replace irq functions with callbacks introduced a spinlock recursion problem. request_irq tries to call the handler if the IRQ is shared. The irq handler of hvc_console calls hvc_poll and hvc_kill which might take the hvc_struct spinlock. Therefore, we have to call request_irq outside the spinlock. We can move the notifier_add safely outside the spinlock as ->data must not be changed by the backend. Otherwise, tty_hangup would fail anyway. Signed-off-by: Christian Borntraeger Signed-off-by: Rusty Russell --- drivers/char/hvc_console.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 02aac104842d..fd64137b1ab9 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -322,11 +322,10 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) hp->tty = tty; - if (hp->ops->notifier_add) - rc = hp->ops->notifier_add(hp, hp->data); - spin_unlock_irqrestore(&hp->lock, flags); + if (hp->ops->notifier_add) + rc = hp->ops->notifier_add(hp, hp->data); /* * If the notifier fails we return an error. The tty layer -- cgit v1.2.1