diff options
Diffstat (limited to 'arch')
53 files changed, 1020 insertions, 1080 deletions
diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 7779f2d1acad..299bc0468702 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -53,7 +53,7 @@ tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi tune-$(CONFIG_CPU_SA110) :=-mtune=strongarm110 tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100 tune-$(CONFIG_CPU_XSCALE) :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_V6) :=-mtune=strongarm +tune-$(CONFIG_CPU_V6) :=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) # Need -Uarm for gcc < 3.x CFLAGS_ABI :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,) diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 835d450797a1..7b17a87a3311 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -45,8 +45,8 @@ extern void fp_enter(void); #define EXPORT_SYMBOL_ALIAS(sym,orig) \ EXPORT_CRC_ALIAS(sym) \ - const struct kernel_symbol __ksymtab_##sym \ - __attribute__((section("__ksymtab"))) = \ + static const struct kernel_symbol __ksymtab_##sym \ + __attribute_used__ __attribute__((section("__ksymtab"))) = \ { (unsigned long)&orig, #sym }; /* diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 81d450ac3fab..066597f4345a 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -106,15 +106,10 @@ ENTRY(ret_from_fork) .endm .Larm700bug: - ldr r0, [sp, #S_PSR] @ Get calling cpsr - sub lr, lr, #4 - str lr, [r8] - msr spsr_cxsf, r0 ldmia sp, {r0 - lr}^ @ Get calling r0 - lr mov r0, r0 - ldr lr, [sp, #S_PC] @ Get PC add sp, sp, #S_FRAME_SIZE - movs pc, lr + subs pc, lr, #4 #else .macro arm710_bug_check, instr, temp .endm diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 08e58ecd44be..0d5db5279c5c 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -89,13 +89,6 @@ SECTIONS *(.got) /* Global offset table */ } - . = ALIGN(16); - __ex_table : { /* Exception table */ - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } - RODATA _etext = .; /* End of text and rodata section */ @@ -138,6 +131,14 @@ SECTIONS *(.data.cacheline_aligned) /* + * The exception fixup table (might need resorting at runtime) + */ + . = ALIGN(32); + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + + /* * and the usual data section */ *(.data) diff --git a/arch/arm/mach-l7200/core.c b/arch/arm/mach-l7200/core.c index 5fd8c9f97f9a..03ed742ae2be 100644 --- a/arch/arm/mach-l7200/core.c +++ b/arch/arm/mach-l7200/core.c @@ -7,11 +7,17 @@ */ #include <linux/kernel.h> #include <linux/init.h> +#include <linux/device.h> +#include <asm/types.h> +#include <asm/irq.h> +#include <asm/mach-types.h> #include <asm/hardware.h> #include <asm/page.h> +#include <asm/mach/arch.h> #include <asm/mach/map.h> +#include <asm/mach/irq.h> /* * IRQ base register @@ -47,6 +53,12 @@ static void l7200_unmask_irq(unsigned int irq) { IRQ_ENABLE = 1 << irq; } + +static struct irqchip l7200_irq_chip = { + .ack = l7200_mask_irq, + .mask = l7200_mask_irq, + .unmask = l7200_unmask_irq +}; static void __init l7200_init_irq(void) { @@ -56,11 +68,9 @@ static void __init l7200_init_irq(void) FIQ_ENABLECLEAR = 0xffffffff; /* clear all fast interrupt enables */ for (irq = 0; irq < NR_IRQS; irq++) { - irq_desc[irq].valid = 1; - irq_desc[irq].probe_ok = 1; - irq_desc[irq].mask_ack = l7200_mask_irq; - irq_desc[irq].mask = l7200_mask_irq; - irq_desc[irq].unmask = l7200_unmask_irq; + set_irq_chip(irq, &l7200_irq_chip); + set_irq_flags(irq, IRQF_VALID); + set_irq_handler(irq, do_level_IRQ); } init_FIQ(); diff --git a/arch/arm/mach-pxa/corgi_lcd.c b/arch/arm/mach-pxa/corgi_lcd.c index c02ef7c0f7ef..850538fadece 100644 --- a/arch/arm/mach-pxa/corgi_lcd.c +++ b/arch/arm/mach-pxa/corgi_lcd.c @@ -467,6 +467,7 @@ void corgi_put_hsync(void) { if (get_hsync_time) symbol_put(w100fb_get_hsynclen); + get_hsync_time = NULL; } void corgi_wait_hsync(void) @@ -476,20 +477,37 @@ void corgi_wait_hsync(void) #endif #ifdef CONFIG_PXA_SHARP_Cxx00 +static struct device *spitz_pxafb_dev; + +static int is_pxafb_device(struct device * dev, void * data) +{ + struct platform_device *pdev = container_of(dev, struct platform_device, dev); + + return (strncmp(pdev->name, "pxa2xx-fb", 9) == 0); +} + unsigned long spitz_get_hsync_len(void) { + if (!spitz_pxafb_dev) { + spitz_pxafb_dev = bus_find_device(&platform_bus_type, NULL, NULL, is_pxafb_device); + if (!spitz_pxafb_dev) + return 0; + } if (!get_hsync_time) get_hsync_time = symbol_get(pxafb_get_hsync_time); if (!get_hsync_time) return 0; - return pxafb_get_hsync_time(&pxafb_device.dev); + return pxafb_get_hsync_time(spitz_pxafb_dev); } void spitz_put_hsync(void) { + put_device(spitz_pxafb_dev); if (get_hsync_time) symbol_put(pxafb_get_hsync_time); + spitz_pxafb_dev = NULL; + get_hsync_time = NULL; } void spitz_wait_hsync(void) diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c index d0660a8c4b70..d327c127eddb 100644 --- a/arch/arm/mach-pxa/generic.c +++ b/arch/arm/mach-pxa/generic.c @@ -208,6 +208,11 @@ static struct platform_device pxafb_device = { .resource = pxafb_resources, }; +void __init set_pxa_fb_parent(struct device *parent_dev) +{ + pxafb_device.dev.parent = parent_dev; +} + static struct platform_device ffuart_device = { .name = "pxa2xx-uart", .id = 0, diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 568afe3d6e1a..d0ab428c2d7d 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -36,7 +36,6 @@ #include <asm/arch/irq.h> #include <asm/arch/mmc.h> #include <asm/arch/udc.h> -#include <asm/arch/ohci.h> #include <asm/arch/pxafb.h> #include <asm/arch/akita.h> #include <asm/arch/spitz.h> @@ -304,7 +303,6 @@ static struct platform_device *devices[] __initdata = { &spitzkbd_device, &spitzts_device, &spitzbl_device, - &spitzbattery_device, }; static void __init common_init(void) @@ -328,7 +326,7 @@ static void __init common_init(void) platform_add_devices(devices, ARRAY_SIZE(devices)); pxa_set_mci_info(&spitz_mci_platform_data); - pxafb_device.dev.parent = &spitzssp_device.dev; + set_pxa_fb_parent(&spitzssp_device.dev); set_pxa_fb_info(&spitz_pxafb_info); } diff --git a/arch/arm/mach-s3c2410/Kconfig b/arch/arm/mach-s3c2410/Kconfig index 06807c6ee68a..c796bcdd6158 100644 --- a/arch/arm/mach-s3c2410/Kconfig +++ b/arch/arm/mach-s3c2410/Kconfig @@ -12,6 +12,7 @@ config MACH_ANUBIS config ARCH_BAST bool "Simtec Electronics BAST (EB2410ITX)" select CPU_S3C2410 + select ISA help Say Y here if you are using the Simtec Electronics EB2410ITX development board (also known as BAST) diff --git a/arch/arm/mach-s3c2410/mach-anubis.c b/arch/arm/mach-s3c2410/mach-anubis.c index 7c05f27fe1d6..5ae80f4e3e67 100644 --- a/arch/arm/mach-s3c2410/mach-anubis.c +++ b/arch/arm/mach-s3c2410/mach-anubis.c @@ -125,7 +125,7 @@ static int external_map[] = { 2 }; static int chip0_map[] = { 0 }; static int chip1_map[] = { 1 }; -struct mtd_partition anubis_default_nand_part[] = { +static struct mtd_partition anubis_default_nand_part[] = { [0] = { .name = "Boot Agent", .size = SZ_16K, diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c index ed1f07d7252f..8ca955984645 100644 --- a/arch/arm/mach-s3c2410/mach-bast.c +++ b/arch/arm/mach-s3c2410/mach-bast.c @@ -230,7 +230,7 @@ static int chip0_map[] = { 1 }; static int chip1_map[] = { 2 }; static int chip2_map[] = { 3 }; -struct mtd_partition bast_default_nand_part[] = { +static struct mtd_partition bast_default_nand_part[] = { [0] = { .name = "Boot Agent", .size = SZ_16K, @@ -340,7 +340,7 @@ static struct resource bast_dm9k_resource[] = { * better IO routines can be written and tested */ -struct dm9000_plat_data bast_dm9k_platdata = { +static struct dm9000_plat_data bast_dm9k_platdata = { .flags = DM9000_PLATF_16BITONLY }; diff --git a/arch/arm/mach-s3c2410/mach-vr1000.c b/arch/arm/mach-s3c2410/mach-vr1000.c index 663a7f98fc0b..46b259673c18 100644 --- a/arch/arm/mach-s3c2410/mach-vr1000.c +++ b/arch/arm/mach-s3c2410/mach-vr1000.c @@ -288,7 +288,7 @@ static struct resource vr1000_dm9k1_resource[] = { * better IO routines can be written and tested */ -struct dm9000_plat_data vr1000_dm9k_platdata = { +static struct dm9000_plat_data vr1000_dm9k_platdata = { .flags = DM9000_PLATF_16BITONLY, }; diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c index 0b88993dfd27..a8bf5ec82602 100644 --- a/arch/arm/mach-s3c2410/s3c2410.c +++ b/arch/arm/mach-s3c2410/s3c2410.c @@ -125,9 +125,6 @@ static struct platform_device *uart_devices[] __initdata = { &s3c_uart2 }; -/* store our uart devices for the serial driver console */ -struct platform_device *s3c2410_uart_devices[3]; - static int s3c2410_uart_count = 0; /* uart registration process */ diff --git a/arch/arm/mach-s3c2410/s3c2440.c b/arch/arm/mach-s3c2410/s3c2440.c index d4c8281b55f6..833fa36bce05 100644 --- a/arch/arm/mach-s3c2410/s3c2440.c +++ b/arch/arm/mach-s3c2410/s3c2440.c @@ -151,7 +151,7 @@ void __init s3c2440_init_uarts(struct s3c2410_uartcfg *cfg, int no) #ifdef CONFIG_PM -struct sleep_save s3c2440_sleep[] = { +static struct sleep_save s3c2440_sleep[] = { SAVE_ITEM(S3C2440_DSC0), SAVE_ITEM(S3C2440_DSC1), SAVE_ITEM(S3C2440_GPJDAT), @@ -260,7 +260,7 @@ void __init s3c2440_init_clocks(int xtal) * as a driver which may support both 2410 and 2440 may try and use it. */ -int __init s3c2440_core_init(void) +static int __init s3c2440_core_init(void) { return sysdev_class_register(&s3c2440_sysclass); } diff --git a/arch/arm/mach-s3c2410/time.c b/arch/arm/mach-s3c2410/time.c index c0acfb2ad790..8a00e3c3cd08 100644 --- a/arch/arm/mach-s3c2410/time.c +++ b/arch/arm/mach-s3c2410/time.c @@ -38,6 +38,7 @@ #include <asm/hardware/clock.h> #include "clock.h" +#include "cpu.h" static unsigned long timer_startval; static unsigned long timer_usec_ticks; diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index f35e69e9c65c..705c98921c37 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -111,7 +111,7 @@ proc_alignment_read(char *page, char **start, off_t off, int count, int *eof, } static int proc_alignment_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + unsigned long count, void *data) { char mode; @@ -119,7 +119,7 @@ static int proc_alignment_write(struct file *file, const char __user *buffer, if (get_user(mode, buffer)) return -EFAULT; if (mode >= '0' && mode <= '5') - ai_usermode = mode - '0'; + ai_usermode = mode - '0'; } return count; } @@ -262,7 +262,7 @@ union offset_union { goto fault; \ } while (0) -#define put32_unaligned_check(val,addr) \ +#define put32_unaligned_check(val,addr) \ __put32_unaligned_check("strb", val, addr) #define put32t_unaligned_check(val,addr) \ @@ -306,19 +306,19 @@ do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *r return TYPE_LDST; user: - if (LDST_L_BIT(instr)) { - unsigned long val; - get16t_unaligned_check(val, addr); + if (LDST_L_BIT(instr)) { + unsigned long val; + get16t_unaligned_check(val, addr); - /* signed half-word? */ - if (instr & 0x40) - val = (signed long)((signed short) val); + /* signed half-word? */ + if (instr & 0x40) + val = (signed long)((signed short) val); - regs->uregs[rd] = val; - } else - put16t_unaligned_check(regs->uregs[rd], addr); + regs->uregs[rd] = val; + } else + put16t_unaligned_check(regs->uregs[rd], addr); - return TYPE_LDST; + return TYPE_LDST; fault: return TYPE_FAULT; @@ -342,11 +342,11 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, unsigned long val; get32_unaligned_check(val, addr); regs->uregs[rd] = val; - get32_unaligned_check(val, addr+4); - regs->uregs[rd+1] = val; + get32_unaligned_check(val, addr + 4); + regs->uregs[rd + 1] = val; } else { put32_unaligned_check(regs->uregs[rd], addr); - put32_unaligned_check(regs->uregs[rd+1], addr+4); + put32_unaligned_check(regs->uregs[rd + 1], addr + 4); } return TYPE_LDST; @@ -356,11 +356,11 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, unsigned long val; get32t_unaligned_check(val, addr); regs->uregs[rd] = val; - get32t_unaligned_check(val, addr+4); - regs->uregs[rd+1] = val; + get32t_unaligned_check(val, addr + 4); + regs->uregs[rd + 1] = val; } else { put32t_unaligned_check(regs->uregs[rd], addr); - put32t_unaligned_check(regs->uregs[rd+1], addr+4); + put32t_unaligned_check(regs->uregs[rd + 1], addr + 4); } return TYPE_LDST; @@ -443,7 +443,7 @@ do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *reg if (LDST_P_EQ_U(instr)) /* U = P */ eaddr += 4; - /* + /* * For alignment faults on the ARM922T/ARM920T the MMU makes * the FSR (and hence addr) equal to the updated base address * of the multiple access rather than the restored value. @@ -570,7 +570,7 @@ thumb2arm(u16 tinstr) /* 6.5.1 Format 3: */ case 0x4800 >> 11: /* 7.1.28 LDR(3) */ /* NOTE: This case is not technically possible. We're - * loading 32-bit memory data via PC relative + * loading 32-bit memory data via PC relative * addressing mode. So we can and should eliminate * this case. But I'll leave it here for now. */ @@ -642,7 +642,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (fault) { type = TYPE_FAULT; - goto bad_or_fault; + goto bad_or_fault; } if (user_mode(regs)) diff --git a/arch/arm/nwfpe/fpa11.c b/arch/arm/nwfpe/fpa11.c index 7690f731ee87..7b3d74d73c80 100644 --- a/arch/arm/nwfpe/fpa11.c +++ b/arch/arm/nwfpe/fpa11.c @@ -31,11 +31,6 @@ #include <linux/string.h> #include <asm/system.h> -/* forward declarations */ -unsigned int EmulateCPDO(const unsigned int); -unsigned int EmulateCPDT(const unsigned int); -unsigned int EmulateCPRT(const unsigned int); - /* Reset the FPA11 chip. Called to initialize and reset the emulator. */ static void resetFPA11(void) { diff --git a/arch/arm/nwfpe/fpa11.h b/arch/arm/nwfpe/fpa11.h index 93523ae4b7a1..9677ae8448e8 100644 --- a/arch/arm/nwfpe/fpa11.h +++ b/arch/arm/nwfpe/fpa11.h @@ -95,4 +95,24 @@ extern int8 SetRoundingMode(const unsigned int); extern int8 SetRoundingPrecision(const unsigned int); extern void nwfpe_init_fpa(union fp_state *fp); +extern unsigned int EmulateAll(unsigned int opcode); + +extern unsigned int EmulateCPDT(const unsigned int opcode); +extern unsigned int EmulateCPDO(const unsigned int opcode); +extern unsigned int EmulateCPRT(const unsigned int opcode); + +/* fpa11_cpdt.c */ +extern unsigned int PerformLDF(const unsigned int opcode); +extern unsigned int PerformSTF(const unsigned int opcode); +extern unsigned int PerformLFM(const unsigned int opcode); +extern unsigned int PerformSFM(const unsigned int opcode); + +/* single_cpdo.c */ + +extern unsigned int SingleCPDO(struct roundingData *roundData, + const unsigned int opcode, FPREG * rFd); +/* double_cpdo.c */ +extern unsigned int DoubleCPDO(struct roundingData *roundData, + const unsigned int opcode, FPREG * rFd); + #endif diff --git a/arch/arm/nwfpe/fpa11_cprt.c b/arch/arm/nwfpe/fpa11_cprt.c index adf8d3000540..7c67023655e4 100644 --- a/arch/arm/nwfpe/fpa11_cprt.c +++ b/arch/arm/nwfpe/fpa11_cprt.c @@ -26,12 +26,11 @@ #include "fpa11.inl" #include "fpmodule.h" #include "fpmodule.inl" +#include "softfloat.h" #ifdef CONFIG_FPE_NWFPE_XP extern flag floatx80_is_nan(floatx80); #endif -extern flag float64_is_nan(float64); -extern flag float32_is_nan(float32); unsigned int PerformFLT(const unsigned int opcode); unsigned int PerformFIX(const unsigned int opcode); diff --git a/arch/arm/nwfpe/fpopcode.h b/arch/arm/nwfpe/fpopcode.h index 1777e92a88e6..6528e081c83f 100644 --- a/arch/arm/nwfpe/fpopcode.h +++ b/arch/arm/nwfpe/fpopcode.h @@ -476,4 +476,10 @@ static inline unsigned int getDestinationSize(const unsigned int opcode) return (nRc); } +extern unsigned int checkCondition(const unsigned int opcode, + const unsigned int ccodes); + +extern const float64 float64Constant[]; +extern const float32 float32Constant[]; + #endif diff --git a/arch/arm/nwfpe/softfloat.h b/arch/arm/nwfpe/softfloat.h index 1c8799b9ee4d..14151700b6b2 100644 --- a/arch/arm/nwfpe/softfloat.h +++ b/arch/arm/nwfpe/softfloat.h @@ -265,4 +265,7 @@ static inline flag float64_lt_nocheck(float64 a, float64 b) return (a != b) && (aSign ^ (a < b)); } +extern flag float32_is_nan( float32 a ); +extern flag float64_is_nan( float64 a ); + #endif diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 2c5cae04a95c..957f551ba5ce 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/cpumask.h> #include <linux/interrupt.h> +#include <linux/module.h> #define IPI_SCHEDULE 1 #define IPI_CALL 2 @@ -28,6 +29,7 @@ spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; /* CPU masks */ cpumask_t cpu_online_map = CPU_MASK_NONE; cpumask_t phys_cpu_present_map = CPU_MASK_NONE; +EXPORT_SYMBOL(phys_cpu_present_map); /* Variables used during SMP boot */ volatile int cpu_now_booting = 0; diff --git a/arch/m32r/kernel/entry.S b/arch/m32r/kernel/entry.S index dddbf6b5ed2c..85920fb8d08c 100644 --- a/arch/m32r/kernel/entry.S +++ b/arch/m32r/kernel/entry.S @@ -681,6 +681,15 @@ ENTRY(debug_trap) bl do_debug_trap bra error_code +ENTRY(ill_trap) + /* void ill_trap(void) */ + SWITCH_TO_KERNEL_STACK + SAVE_ALL + ldi r1, #0 ; error_code ; FIXME + mv r0, sp ; pt_regs + bl do_ill_trap + bra error_code + /* Cache flushing handler */ ENTRY(cache_flushing_handler) diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c index a4576ac7e870..8b1f6eb76870 100644 --- a/arch/m32r/kernel/smp.c +++ b/arch/m32r/kernel/smp.c @@ -275,12 +275,14 @@ static void flush_tlb_all_ipi(void *info) *==========================================================================*/ void smp_flush_tlb_mm(struct mm_struct *mm) { - int cpu_id = smp_processor_id(); + int cpu_id; cpumask_t cpu_mask; - unsigned long *mmc = &mm->context[cpu_id]; + unsigned long *mmc; unsigned long flags; preempt_disable(); + cpu_id = smp_processor_id(); + mmc = &mm->context[cpu_id]; cpu_mask = mm->cpu_vm_mask; cpu_clear(cpu_id, cpu_mask); @@ -343,12 +345,14 @@ void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm; - int cpu_id = smp_processor_id(); + int cpu_id; cpumask_t cpu_mask; - unsigned long *mmc = &mm->context[cpu_id]; + unsigned long *mmc; unsigned long flags; preempt_disable(); + cpu_id = smp_processor_id(); + mmc = &mm->context[cpu_id]; cpu_mask = mm->cpu_vm_mask; cpu_clear(cpu_id, cpu_mask); diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index 01922271d17e..5fe8ed6d62dc 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -5,8 +5,6 @@ * Hitoshi Yamamoto */ -/* $Id$ */ - /* * 'traps.c' handles hardware traps and faults after we have saved some * state in 'entry.S'. @@ -35,6 +33,7 @@ asmlinkage void ei_handler(void); asmlinkage void rie_handler(void); asmlinkage void debug_trap(void); asmlinkage void cache_flushing_handler(void); +asmlinkage void ill_trap(void); #ifdef CONFIG_SMP extern void smp_reschedule_interrupt(void); @@ -77,22 +76,22 @@ void set_eit_vector_entries(void) eit_vector[5] = BRA_INSN(default_eit_handler, 5); eit_vector[8] = BRA_INSN(rie_handler, 8); eit_vector[12] = BRA_INSN(alignment_check, 12); - eit_vector[16] = 0xff000000UL; + eit_vector[16] = BRA_INSN(ill_trap, 16); eit_vector[17] = BRA_INSN(debug_trap, 17); eit_vector[18] = BRA_INSN(system_call, 18); - eit_vector[19] = 0xff000000UL; - eit_vector[20] = 0xff000000UL; - eit_vector[21] = 0xff000000UL; - eit_vector[22] = 0xff000000UL; - eit_vector[23] = 0xff000000UL; - eit_vector[24] = 0xff000000UL; - eit_vector[25] = 0xff000000UL; - eit_vector[26] = 0xff000000UL; - eit_vector[27] = 0xff000000UL; + eit_vector[19] = BRA_INSN(ill_trap, 19); + eit_vector[20] = BRA_INSN(ill_trap, 20); + eit_vector[21] = BRA_INSN(ill_trap, 21); + eit_vector[22] = BRA_INSN(ill_trap, 22); + eit_vector[23] = BRA_INSN(ill_trap, 23); + eit_vector[24] = BRA_INSN(ill_trap, 24); + eit_vector[25] = BRA_INSN(ill_trap, 25); + eit_vector[26] = BRA_INSN(ill_trap, 26); + eit_vector[27] = BRA_INSN(ill_trap, 27); eit_vector[28] = BRA_INSN(cache_flushing_handler, 28); - eit_vector[29] = 0xff000000UL; - eit_vector[30] = 0xff000000UL; - eit_vector[31] = 0xff000000UL; + eit_vector[29] = BRA_INSN(ill_trap, 29); + eit_vector[30] = BRA_INSN(ill_trap, 30); + eit_vector[31] = BRA_INSN(ill_trap, 31); eit_vector[32] = BRA_INSN(ei_handler, 32); eit_vector[64] = BRA_INSN(pie_handler, 64); #ifdef CONFIG_MMU @@ -286,7 +285,8 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ DO_ERROR( 1, SIGTRAP, "debug trap", debug_trap) DO_ERROR_INFO(0x20, SIGILL, "reserved instruction ", rie_handler, ILL_ILLOPC, regs->bpc) -DO_ERROR_INFO(0x100, SIGILL, "privilege instruction", pie_handler, ILL_PRVOPC, regs->bpc) +DO_ERROR_INFO(0x100, SIGILL, "privileged instruction", pie_handler, ILL_PRVOPC, regs->bpc) +DO_ERROR_INFO(-1, SIGILL, "illegal trap", ill_trap, ILL_ILLTRP, regs->bpc) extern int handle_unaligned_access(unsigned long, struct pt_regs *); @@ -329,4 +329,3 @@ asmlinkage void do_alignment_check(struct pt_regs *regs, long error_code) set_fs(oldfs); } } - diff --git a/arch/mips/pci/fixup-tb0226.c b/arch/mips/pci/fixup-tb0226.c index 61513d5d97da..b5d42b12de10 100644 --- a/arch/mips/pci/fixup-tb0226.c +++ b/arch/mips/pci/fixup-tb0226.c @@ -1,7 +1,7 @@ /* * fixup-tb0226.c, The TANBAC TB0226 specific PCI fixups. * - * Copyright (C) 2002-2004 Yoichi Yuasa <yuasa@hh.iij4u.or.jp> + * Copyright (C) 2002-2005 Yoichi Yuasa <yuasa@hh.iij4u.or.jp> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/pci.h> +#include <asm/vr41xx/giu.h> #include <asm/vr41xx/tb0226.h> int __init pcibios_map_irq(struct pci_dev *dev, u8 slot, u8 pin) @@ -29,42 +30,42 @@ int __init pcibios_map_irq(struct pci_dev *dev, u8 slot, u8 pin) switch (slot) { case 12: vr41xx_set_irq_trigger(GD82559_1_PIN, - TRIGGER_LEVEL, - SIGNAL_THROUGH); - vr41xx_set_irq_level(GD82559_1_PIN, LEVEL_LOW); + IRQ_TRIGGER_LEVEL, + IRQ_SIGNAL_THROUGH); + vr41xx_set_irq_level(GD82559_1_PIN, IRQ_LEVEL_LOW); irq = GD82559_1_IRQ; break; case 13: vr41xx_set_irq_trigger(GD82559_2_PIN, - TRIGGER_LEVEL, - SIGNAL_THROUGH); - vr41xx_set_irq_level(GD82559_2_PIN, LEVEL_LOW); + IRQ_TRIGGER_LEVEL, + IRQ_SIGNAL_THROUGH); + vr41xx_set_irq_level(GD82559_2_PIN, IRQ_LEVEL_LOW); irq = GD82559_2_IRQ; break; case 14: switch (pin) { case 1: vr41xx_set_irq_trigger(UPD720100_INTA_PIN, - TRIGGER_LEVEL, - SIGNAL_THROUGH); + IRQ_TRIGGER_LEVEL, + IRQ_SIGNAL_THROUGH); vr41xx_set_irq_level(UPD720100_INTA_PIN, - LEVEL_LOW); + IRQ_LEVEL_LOW); irq = UPD720100_INTA_IRQ; break; case 2: vr41xx_set_irq_trigger(UPD720100_INTB_PIN, - TRIGGER_LEVEL, - SIGNAL_THROUGH); + IRQ_TRIGGER_LEVEL, + IRQ_SIGNAL_THROUGH); vr41xx_set_irq_level(UPD720100_INTB_PIN, - LEVEL_LOW); + IRQ_LEVEL_LOW); irq = UPD720100_INTB_IRQ; break; case 3: vr41xx_set_irq_trigger(UPD720100_INTC_PIN, - TRIGGER_LEVEL, - SIGNAL_THROUGH); + IRQ_TRIGGER_LEVEL, + IRQ_SIGNAL_THROUGH); vr41xx_set_irq_level(UPD720100_INTC_PIN, - LEVEL_LOW); + IRQ_LEVEL_LOW); irq = UPD720100_INTC_IRQ; break; default: diff --git a/arch/ppc/kernel/cputable.c b/arch/ppc/kernel/cputable.c index 546e1ea4cafa..6b76cf58d9e0 100644 --- a/arch/ppc/kernel/cputable.c +++ b/arch/ppc/kernel/cputable.c @@ -91,7 +91,7 @@ struct cpu_spec cpu_specs[] = { .cpu_features = CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE, .cpu_user_features = COMMON_PPC | PPC_FEATURE_601_INSTR | - PPC_FEATURE_UNIFIED_CACHE, + PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB, .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_601 @@ -745,7 +745,8 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "403GCX", .cpu_features = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB, .icache_bsize = 16, .dcache_bsize = 16, }, diff --git a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c index b566d982806c..8edee806dae7 100644 --- a/arch/ppc/kernel/dma-mapping.c +++ b/arch/ppc/kernel/dma-mapping.c @@ -401,10 +401,10 @@ EXPORT_SYMBOL(__dma_sync); static inline void __dma_sync_page_highmem(struct page *page, unsigned long offset, size_t size, int direction) { - size_t seg_size = min((size_t)PAGE_SIZE, size) - offset; + size_t seg_size = min((size_t)(PAGE_SIZE - offset), size); size_t cur_size = seg_size; unsigned long flags, start, seg_offset = offset; - int nr_segs = PAGE_ALIGN(size + (PAGE_SIZE - offset))/PAGE_SIZE; + int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE; int seg_nr = 0; local_irq_save(flags); diff --git a/arch/ppc64/kernel/module.c b/arch/ppc64/kernel/module.c index c683bf88e690..928b8581fcb0 100644 --- a/arch/ppc64/kernel/module.c +++ b/arch/ppc64/kernel/module.c @@ -341,6 +341,19 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *(unsigned long *)location = my_r2(sechdrs, me); break; + case R_PPC64_TOC16: + /* Subtact TOC pointer */ + value -= my_r2(sechdrs, me); + if (value + 0x8000 > 0xffff) { + printk("%s: bad TOC16 relocation (%lu)\n", + me->name, value); + return -ENOEXEC; + } + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xffff) + | (value & 0xffff); + break; + case R_PPC64_TOC16_DS: /* Subtact TOC pointer */ value -= my_r2(sechdrs, me); diff --git a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c index 1f5f141fb7a1..928f8febdb3b 100644 --- a/arch/ppc64/kernel/pSeries_pci.c +++ b/arch/ppc64/kernel/pSeries_pci.c @@ -32,7 +32,7 @@ #include "pci.h" -static int __initdata s7a_workaround = -1; +static int __devinitdata s7a_workaround = -1; #if 0 void pcibios_name_device(struct pci_dev *dev) @@ -60,7 +60,7 @@ void pcibios_name_device(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); #endif -static void __init check_s7a(void) +static void __devinit check_s7a(void) { struct device_node *root; char *model; diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c index 25755252067a..fa8121d53b89 100644 --- a/arch/ppc64/kernel/pmac_setup.c +++ b/arch/ppc64/kernel/pmac_setup.c @@ -115,7 +115,7 @@ static void __pmac pmac_show_cpuinfo(struct seq_file *m) /* find motherboard type */ seq_printf(m, "machine\t\t: "); - np = find_devices("device-tree"); + np = of_find_node_by_path("/"); if (np != NULL) { pp = (char *) get_property(np, "model", NULL); if (pp != NULL) @@ -133,6 +133,7 @@ static void __pmac pmac_show_cpuinfo(struct seq_file *m) } seq_printf(m, "\n"); } + of_node_put(np); } else seq_printf(m, "PowerMac\n"); diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 56a39d69e080..5ecefc02896a 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -22,6 +22,7 @@ #include <linux/time.h> #include <linux/timex.h> #include <linux/sched.h> +#include <linux/module.h> #include <asm/atomic.h> #include <asm/processor.h> @@ -39,6 +40,8 @@ struct sh_cpuinfo cpu_data[NR_CPUS]; extern void per_cpu_trap_init(void); cpumask_t cpu_possible_map; +EXPORT_SYMBOL(cpu_possible_map); + cpumask_t cpu_online_map; static atomic_t cpus_booted = ATOMIC_INIT(0); diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index f7c51b869049..6537445dac0e 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -21,10 +21,6 @@ config GENERIC_ISA_DMA bool default y -config GENERIC_IOMAP - bool - default y - source "init/Kconfig" menu "General machine setup" diff --git a/arch/sparc/defconfig b/arch/sparc/defconfig index 8a3aef1e22f5..a69856263009 100644 --- a/arch/sparc/defconfig +++ b/arch/sparc/defconfig @@ -5,7 +5,6 @@ CONFIG_MMU=y CONFIG_UID16=y CONFIG_HIGHMEM=y CONFIG_GENERIC_ISA_DMA=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options diff --git a/arch/sparc64/kernel/dtlb_base.S b/arch/sparc64/kernel/dtlb_base.S index 702d349c1e88..6528786840c0 100644 --- a/arch/sparc64/kernel/dtlb_base.S +++ b/arch/sparc64/kernel/dtlb_base.S @@ -53,19 +53,18 @@ * be guaranteed to be 0 ... mmu_context.h does guarantee this * by only using 10 bits in the hwcontext value. */ -#define CREATE_VPTE_OFFSET1(r1, r2) +#define CREATE_VPTE_OFFSET1(r1, r2) nop #define CREATE_VPTE_OFFSET2(r1, r2) \ srax r1, 10, r2 -#define CREATE_VPTE_NOP nop #else #define CREATE_VPTE_OFFSET1(r1, r2) \ srax r1, PAGE_SHIFT, r2 #define CREATE_VPTE_OFFSET2(r1, r2) \ sllx r2, 3, r2 -#define CREATE_VPTE_NOP #endif /* DTLB ** ICACHE line 1: Quick user TLB misses */ + mov TLB_SFSR, %g1 ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS andcc %g4, TAG_CONTEXT_BITS, %g0 ! From Nucleus? from_tl1_trap: @@ -74,18 +73,16 @@ from_tl1_trap: be,pn %xcc, kvmap ! Yep, special processing CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset cmp %g5, 4 ! Last trap level? - be,pn %xcc, longpath ! Yep, cannot risk VPTE miss - nop ! delay slot /* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses */ + be,pn %xcc, longpath ! Yep, cannot risk VPTE miss + nop ! delay slot ldxa [%g3 + %g6] ASI_S, %g5 ! Load VPTE 1: brgez,pn %g5, longpath ! Invalid, branch out nop ! Delay-slot 9: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB retry ! Trap return nop - nop - nop /* DTLB ** ICACHE line 3: winfixups+real_faults */ longpath: @@ -106,8 +103,7 @@ longpath: nop nop nop - CREATE_VPTE_NOP + nop #undef CREATE_VPTE_OFFSET1 #undef CREATE_VPTE_OFFSET2 -#undef CREATE_VPTE_NOP diff --git a/arch/sparc64/kernel/dtlb_prot.S b/arch/sparc64/kernel/dtlb_prot.S index d848bb7374bb..e0a920162604 100644 --- a/arch/sparc64/kernel/dtlb_prot.S +++ b/arch/sparc64/kernel/dtlb_prot.S @@ -14,14 +14,14 @@ */ /* PROT ** ICACHE line 1: User DTLB protection trap */ - stxa %g0, [%g1] ASI_DMMU ! Clear SFSR FaultValid bit - membar #Sync ! Synchronize ASI stores - rdpr %pstate, %g5 ! Move into alternate globals + mov TLB_SFSR, %g1 + stxa %g0, [%g1] ASI_DMMU ! Clear FaultValid bit + membar #Sync ! Synchronize stores + rdpr %pstate, %g5 ! Move into alt-globals wrpr %g5, PSTATE_AG|PSTATE_MG, %pstate - rdpr %tl, %g1 ! Need to do a winfixup? + rdpr %tl, %g1 ! Need a winfixup? cmp %g1, 1 ! Trap level >1? - mov TLB_TAG_ACCESS, %g4 ! Prepare reload of vaddr - nop + mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr /* PROT ** ICACHE line 2: More real fault processing */ bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index f1dcdf8f7433..b49dcd4504b0 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -28,19 +28,14 @@ #include <asm/mmu.h> /* This section from from _start to sparc64_boot_end should fit into - * 0x0000.0000.0040.4000 to 0x0000.0000.0040.8000 and will be sharing space - * with bootup_user_stack, which is from 0x0000.0000.0040.4000 to - * 0x0000.0000.0040.6000 and empty_bad_page, which is from - * 0x0000.0000.0040.6000 to 0x0000.0000.0040.8000. + * 0x0000000000404000 to 0x0000000000408000. */ - .text .globl start, _start, stext, _stext _start: start: _stext: stext: -bootup_user_stack: ! 0x0000000000404000 b sparc64_boot flushw /* Flush register file. */ @@ -191,8 +186,9 @@ prom_boot_mapping_phys_low: stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 5 stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1: "translate" stx %l5, [%sp + 2047 + 128 + 0x20] ! arg2: prom_mmu_ihandle_cache - srlx %l0, 22, %l3 - sllx %l3, 22, %l3 + /* PAGE align */ + srlx %l0, 13, %l3 + sllx %l3, 13, %l3 stx %l3, [%sp + 2047 + 128 + 0x28] ! arg3: vaddr, our PC stx %g0, [%sp + 2047 + 128 + 0x30] ! res1 stx %g0, [%sp + 2047 + 128 + 0x38] ! res2 @@ -211,6 +207,9 @@ prom_boot_mapping_phys_low: ldx [%sp + 2047 + 128 + 0x48], %l2 ! physaddr high stx %l2, [%l4 + 0x0] ldx [%sp + 2047 + 128 + 0x50], %l3 ! physaddr low + /* 4MB align */ + srlx %l3, 22, %l3 + sllx %l3, 22, %l3 stx %l3, [%l4 + 0x8] /* Leave service as-is, "call-method" */ @@ -388,31 +387,30 @@ tlb_fixup_done: * former does use this code, the latter does not yet due * to some complexities. That should be fixed up at some * point. + * + * There used to be enormous complexity wrt. transferring + * over from the firwmare's trap table to the Linux kernel's. + * For example, there was a chicken & egg problem wrt. building + * the OBP page tables, yet needing to be on the Linux kernel + * trap table (to translate PAGE_OFFSET addresses) in order to + * do that. + * + * We now handle OBP tlb misses differently, via linear lookups + * into the prom_trans[] array. So that specific problem no + * longer exists. Yet, unfortunately there are still some issues + * preventing trampoline.S from using this code... ho hum. */ .globl setup_trap_table setup_trap_table: save %sp, -192, %sp - /* Force interrupts to be disabled. Transferring over to - * the Linux trap table is a very delicate operation. - * Until we are actually on the Linux trap table, we cannot - * get the PAGE_OFFSET linear mappings translated. We need - * that mapping to be setup in order to initialize the firmware - * page tables. - * - * So there is this window of time, from the return from - * prom_set_trap_table() until inherit_prom_mappings_post() - * (in arch/sparc64/mm/init.c) completes, during which no - * firmware address space accesses can be made. - */ + /* Force interrupts to be disabled. */ rdpr %pstate, %o1 andn %o1, PSTATE_IE, %o1 wrpr %o1, 0x0, %pstate wrpr %g0, 15, %pil - /* Ok, now make the final valid firmware call to jump over - * to the Linux trap table. - */ + /* Make the firmware call to jump over to the Linux trap table. */ call prom_set_trap_table sethi %hi(sparc64_ttable_tl0), %o0 @@ -536,15 +534,21 @@ setup_tba: /* i0 = is_starfire */ ret restore +sparc64_boot_end: + +#include "systbls.S" +#include "ktlb.S" +#include "etrap.S" +#include "rtrap.S" +#include "winfixup.S" +#include "entry.S" /* - * The following skips make sure the trap table in ttable.S is aligned + * The following skip makes sure the trap table in ttable.S is aligned * on a 32K boundary as required by the v9 specs for TBA register. */ -sparc64_boot_end: - .skip 0x2000 + _start - sparc64_boot_end -bootup_user_stack_end: - .skip 0x2000 +1: + .skip 0x4000 + _start - 1b #ifdef CONFIG_SBUS /* This is just a hack to fool make depend config.h discovering @@ -556,15 +560,6 @@ bootup_user_stack_end: ! 0x0000000000408000 #include "ttable.S" -#include "systbls.S" -#include "ktlb.S" -#include "etrap.S" -#include "rtrap.S" -#include "winfixup.S" -#include "entry.S" - - /* This is just anal retentiveness on my part... */ - .align 16384 .data .align 8 diff --git a/arch/sparc64/kernel/itlb_base.S b/arch/sparc64/kernel/itlb_base.S index b5e32dfa4fbc..4951ff8f6877 100644 --- a/arch/sparc64/kernel/itlb_base.S +++ b/arch/sparc64/kernel/itlb_base.S @@ -15,14 +15,12 @@ */ #define CREATE_VPTE_OFFSET1(r1, r2) \ srax r1, 10, r2 -#define CREATE_VPTE_OFFSET2(r1, r2) -#define CREATE_VPTE_NOP nop +#define CREATE_VPTE_OFFSET2(r1, r2) nop #else /* PAGE_SHIFT */ #define CREATE_VPTE_OFFSET1(r1, r2) \ srax r1, PAGE_SHIFT, r2 #define CREATE_VPTE_OFFSET2(r1, r2) \ sllx r2, 3, r2 -#define CREATE_VPTE_NOP #endif /* PAGE_SHIFT */ @@ -36,6 +34,7 @@ */ /* ITLB ** ICACHE line 1: Quick user TLB misses */ + mov TLB_SFSR, %g1 ldxa [%g1 + %g1] ASI_IMMU, %g4 ! Get TAG_ACCESS CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset @@ -43,41 +42,38 @@ 1: brgez,pn %g5, 3f ! Not valid, branch out sethi %hi(_PAGE_EXEC), %g4 ! Delay-slot andcc %g5, %g4, %g0 ! Executable? + +/* ITLB ** ICACHE line 2: Real faults */ be,pn %xcc, 3f ! Nope, branch. nop ! Delay-slot 2: stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load PTE into TLB retry ! Trap return -3: rdpr %pstate, %g4 ! Move into alternate globals - -/* ITLB ** ICACHE line 2: Real faults */ +3: rdpr %pstate, %g4 ! Move into alt-globals wrpr %g4, PSTATE_AG|PSTATE_MG, %pstate rdpr %tpc, %g5 ! And load faulting VA mov FAULT_CODE_ITLB, %g4 ! It was read from ITLB -sparc64_realfault_common: ! Called by TL0 dtlb_miss too + +/* ITLB ** ICACHE line 3: Finish faults */ +sparc64_realfault_common: ! Called by dtlb_miss stb %g4, [%g6 + TI_FAULT_CODE] stx %g5, [%g6 + TI_FAULT_ADDR] ba,pt %xcc, etrap ! Save state 1: rd %pc, %g7 ! ... - nop - -/* ITLB ** ICACHE line 3: Finish faults + window fixups */ call do_sparc64_fault ! Call fault handler add %sp, PTREGS_OFF, %o0! Compute pt_regs arg ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state nop + +/* ITLB ** ICACHE line 4: Window fixups */ winfix_trampoline: rdpr %tpc, %g3 ! Prepare winfixup TNPC - or %g3, 0x7c, %g3 ! Compute offset to branch + or %g3, 0x7c, %g3 ! Compute branch offset wrpr %g3, %tnpc ! Write it into TNPC done ! Do it to it - -/* ITLB ** ICACHE line 4: Unused... */ nop nop nop nop - CREATE_VPTE_NOP #undef CREATE_VPTE_OFFSET1 #undef CREATE_VPTE_OFFSET2 -#undef CREATE_VPTE_NOP diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S index 7796b37f478c..d9244d3c9f73 100644 --- a/arch/sparc64/kernel/ktlb.S +++ b/arch/sparc64/kernel/ktlb.S @@ -58,9 +58,6 @@ vpte_noent: done vpte_insn_obp: - sethi %hi(prom_pmd_phys), %g5 - ldx [%g5 + %lo(prom_pmd_phys)], %g5 - /* Behave as if we are at TL0. */ wrpr %g0, 1, %tl rdpr %tpc, %g4 /* Find original faulting iaddr */ @@ -71,58 +68,57 @@ vpte_insn_obp: mov TLB_SFSR, %g1 stxa %g4, [%g1 + %g1] ASI_IMMU - /* Get PMD offset. */ - srlx %g4, 23, %g6 - and %g6, 0x7ff, %g6 - sllx %g6, 2, %g6 - - /* Load PMD, is it valid? */ - lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brz,pn %g5, longpath - sllx %g5, 11, %g5 - - /* Get PTE offset. */ - srlx %g4, 13, %g6 - and %g6, 0x3ff, %g6 - sllx %g6, 3, %g6 - - /* Load PTE. */ - ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brgez,pn %g5, longpath - nop - - /* TLB load and return from trap. */ + sethi %hi(prom_trans), %g5 + or %g5, %lo(prom_trans), %g5 + +1: ldx [%g5 + 0x00], %g6 ! base + brz,a,pn %g6, longpath ! no more entries, fail + mov TLB_SFSR, %g1 ! and restore %g1 + ldx [%g5 + 0x08], %g1 ! len + add %g6, %g1, %g1 ! end + cmp %g6, %g4 + bgu,pt %xcc, 2f + cmp %g4, %g1 + bgeu,pt %xcc, 2f + ldx [%g5 + 0x10], %g1 ! PTE + + /* TLB load, restore %g1, and return from trap. */ + sub %g4, %g6, %g6 + add %g1, %g6, %g5 + mov TLB_SFSR, %g1 stxa %g5, [%g0] ASI_ITLB_DATA_IN retry -kvmap_do_obp: - sethi %hi(prom_pmd_phys), %g5 - ldx [%g5 + %lo(prom_pmd_phys)], %g5 - - /* Get PMD offset. */ - srlx %g4, 23, %g6 - and %g6, 0x7ff, %g6 - sllx %g6, 2, %g6 - - /* Load PMD, is it valid? */ - lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brz,pn %g5, longpath - sllx %g5, 11, %g5 - - /* Get PTE offset. */ - srlx %g4, 13, %g6 - and %g6, 0x3ff, %g6 - sllx %g6, 3, %g6 - - /* Load PTE. */ - ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5 - brgez,pn %g5, longpath - nop +2: ba,pt %xcc, 1b + add %g5, (3 * 8), %g5 ! next entry - /* TLB load and return from trap. */ +kvmap_do_obp: + sethi %hi(prom_trans), %g5 + or %g5, %lo(prom_trans), %g5 + srlx %g4, 13, %g4 + sllx %g4, 13, %g4 + +1: ldx [%g5 + 0x00], %g6 ! base + brz,a,pn %g6, longpath ! no more entries, fail + mov TLB_SFSR, %g1 ! and restore %g1 + ldx [%g5 + 0x08], %g1 ! len + add %g6, %g1, %g1 ! end + cmp %g6, %g4 + bgu,pt %xcc, 2f + cmp %g4, %g1 + bgeu,pt %xcc, 2f + ldx [%g5 + 0x10], %g1 ! PTE + + /* TLB load, restore %g1, and return from trap. */ + sub %g4, %g6, %g6 + add %g1, %g6, %g5 + mov TLB_SFSR, %g1 stxa %g5, [%g0] ASI_DTLB_DATA_IN retry +2: ba,pt %xcc, 1b + add %g5, (3 * 8), %g5 ! next entry + /* * On a first level data miss, check whether this is to the OBP range (note * that such accesses can be made by prom, as well as by kernel using diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index 425c60cfea19..a11910be1013 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -49,12 +49,6 @@ static void __iommu_flushall(struct pci_iommu *iommu) /* Ensure completion of previous PIO writes. */ (void) pci_iommu_read(iommu->write_complete_reg); - - /* Now update everyone's flush point. */ - for (entry = 0; entry < PBM_NCLUSTERS; entry++) { - iommu->alloc_info[entry].flush = - iommu->alloc_info[entry].next; - } } #define IOPTE_CONSISTENT(CTX) \ @@ -80,120 +74,117 @@ static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte) iopte_val(*iopte) = val; } -void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize) +/* Based largely upon the ppc64 iommu allocator. */ +static long pci_arena_alloc(struct pci_iommu *iommu, unsigned long npages) { - int i; - - tsbsize /= sizeof(iopte_t); - - for (i = 0; i < tsbsize; i++) - iopte_make_dummy(iommu, &iommu->page_table[i]); -} - -static iopte_t *alloc_streaming_cluster(struct pci_iommu *iommu, unsigned long npages) -{ - iopte_t *iopte, *limit, *first; - unsigned long cnum, ent, flush_point; - - cnum = 0; - while ((1UL << cnum) < npages) - cnum++; - iopte = (iommu->page_table + - (cnum << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); - - if (cnum == 0) - limit = (iommu->page_table + - iommu->lowest_consistent_map); - else - limit = (iopte + - (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); - - iopte += ((ent = iommu->alloc_info[cnum].next) << cnum); - flush_point = iommu->alloc_info[cnum].flush; - - first = iopte; - for (;;) { - if (IOPTE_IS_DUMMY(iommu, iopte)) { - if ((iopte + (1 << cnum)) >= limit) - ent = 0; - else - ent = ent + 1; - iommu->alloc_info[cnum].next = ent; - if (ent == flush_point) - __iommu_flushall(iommu); - break; + struct pci_iommu_arena *arena = &iommu->arena; + unsigned long n, i, start, end, limit; + int pass; + + limit = arena->limit; + start = arena->hint; + pass = 0; + +again: + n = find_next_zero_bit(arena->map, limit, start); + end = n + npages; + if (unlikely(end >= limit)) { + if (likely(pass < 1)) { + limit = start; + start = 0; + __iommu_flushall(iommu); + pass++; + goto again; + } else { + /* Scanned the whole thing, give up. */ + return -1; } - iopte += (1 << cnum); - ent++; - if (iopte >= limit) { - iopte = (iommu->page_table + - (cnum << - (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); - ent = 0; + } + + for (i = n; i < end; i++) { + if (test_bit(i, arena->map)) { + start = i + 1; + goto again; } - if (ent == flush_point) - __iommu_flushall(iommu); - if (iopte == first) - goto bad; } - /* I've got your streaming cluster right here buddy boy... */ - return iopte; + for (i = n; i < end; i++) + __set_bit(i, arena->map); -bad: - printk(KERN_EMERG "pci_iommu: alloc_streaming_cluster of npages(%ld) failed!\n", - npages); - return NULL; + arena->hint = end; + + return n; } -static void free_streaming_cluster(struct pci_iommu *iommu, dma_addr_t base, - unsigned long npages, unsigned long ctx) +static void pci_arena_free(struct pci_iommu_arena *arena, unsigned long base, unsigned long npages) { - unsigned long cnum, ent; + unsigned long i; - cnum = 0; - while ((1UL << cnum) < npages) - cnum++; + for (i = base; i < (base + npages); i++) + __clear_bit(i, arena->map); +} - ent = (base << (32 - IO_PAGE_SHIFT + PBM_LOGCLUSTERS - iommu->page_table_sz_bits)) - >> (32 + PBM_LOGCLUSTERS + cnum - iommu->page_table_sz_bits); +void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask) +{ + unsigned long i, tsbbase, order, sz, num_tsb_entries; + + num_tsb_entries = tsbsize / sizeof(iopte_t); + + /* Setup initial software IOMMU state. */ + spin_lock_init(&iommu->lock); + iommu->ctx_lowest_free = 1; + iommu->page_table_map_base = dma_offset; + iommu->dma_addr_mask = dma_addr_mask; + + /* Allocate and initialize the free area map. */ + sz = num_tsb_entries / 8; + sz = (sz + 7UL) & ~7UL; + iommu->arena.map = kmalloc(sz, GFP_KERNEL); + if (!iommu->arena.map) { + prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n"); + prom_halt(); + } + memset(iommu->arena.map, 0, sz); + iommu->arena.limit = num_tsb_entries; - /* If the global flush might not have caught this entry, - * adjust the flush point such that we will flush before - * ever trying to reuse it. + /* Allocate and initialize the dummy page which we + * set inactive IO PTEs to point to. */ -#define between(X,Y,Z) (((Z) - (Y)) >= ((X) - (Y))) - if (between(ent, iommu->alloc_info[cnum].next, iommu->alloc_info[cnum].flush)) - iommu->alloc_info[cnum].flush = ent; -#undef between + iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); + if (!iommu->dummy_page) { + prom_printf("PCI_IOMMU: Error, gfp(dummy_page) failed.\n"); + prom_halt(); + } + memset((void *)iommu->dummy_page, 0, PAGE_SIZE); + iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); + + /* Now allocate and setup the IOMMU page table itself. */ + order = get_order(tsbsize); + tsbbase = __get_free_pages(GFP_KERNEL, order); + if (!tsbbase) { + prom_printf("PCI_IOMMU: Error, gfp(tsb) failed.\n"); + prom_halt(); + } + iommu->page_table = (iopte_t *)tsbbase; + + for (i = 0; i < num_tsb_entries; i++) + iopte_make_dummy(iommu, &iommu->page_table[i]); } -/* We allocate consistent mappings from the end of cluster zero. */ -static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long npages) +static inline iopte_t *alloc_npages(struct pci_iommu *iommu, unsigned long npages) { - iopte_t *iopte; + long entry; - iopte = iommu->page_table + (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)); - while (iopte > iommu->page_table) { - iopte--; - if (IOPTE_IS_DUMMY(iommu, iopte)) { - unsigned long tmp = npages; + entry = pci_arena_alloc(iommu, npages); + if (unlikely(entry < 0)) + return NULL; - while (--tmp) { - iopte--; - if (!IOPTE_IS_DUMMY(iommu, iopte)) - break; - } - if (tmp == 0) { - u32 entry = (iopte - iommu->page_table); + return iommu->page_table + entry; +} - if (entry < iommu->lowest_consistent_map) - iommu->lowest_consistent_map = entry; - return iopte; - } - } - } - return NULL; +static inline void free_npages(struct pci_iommu *iommu, dma_addr_t base, unsigned long npages) +{ + pci_arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages); } static int iommu_alloc_ctx(struct pci_iommu *iommu) @@ -233,7 +224,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad struct pcidev_cookie *pcp; struct pci_iommu *iommu; iopte_t *iopte; - unsigned long flags, order, first_page, ctx; + unsigned long flags, order, first_page; void *ret; int npages; @@ -251,9 +242,10 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad iommu = pcp->pbm->iommu; spin_lock_irqsave(&iommu->lock, flags); - iopte = alloc_consistent_cluster(iommu, size >> IO_PAGE_SHIFT); - if (iopte == NULL) { - spin_unlock_irqrestore(&iommu->lock, flags); + iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(iopte == NULL)) { free_pages(first_page, order); return NULL; } @@ -262,31 +254,15 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad ((iopte - iommu->page_table) << IO_PAGE_SHIFT)); ret = (void *) first_page; npages = size >> IO_PAGE_SHIFT; - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = iommu_alloc_ctx(iommu); first_page = __pa(first_page); while (npages--) { - iopte_val(*iopte) = (IOPTE_CONSISTENT(ctx) | + iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) | IOPTE_WRITE | (first_page & IOPTE_PAGE)); iopte++; first_page += IO_PAGE_SIZE; } - { - int i; - u32 daddr = *dma_addrp; - - npages = size >> IO_PAGE_SHIFT; - for (i = 0; i < npages; i++) { - pci_iommu_write(iommu->iommu_flush, daddr); - daddr += IO_PAGE_SIZE; - } - } - - spin_unlock_irqrestore(&iommu->lock, flags); - return ret; } @@ -296,7 +272,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_ struct pcidev_cookie *pcp; struct pci_iommu *iommu; iopte_t *iopte; - unsigned long flags, order, npages, i, ctx; + unsigned long flags, order, npages; npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; pcp = pdev->sysdata; @@ -306,46 +282,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_ spin_lock_irqsave(&iommu->lock, flags); - if ((iopte - iommu->page_table) == - iommu->lowest_consistent_map) { - iopte_t *walk = iopte + npages; - iopte_t *limit; - - limit = (iommu->page_table + - (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); - while (walk < limit) { - if (!IOPTE_IS_DUMMY(iommu, walk)) - break; - walk++; - } - iommu->lowest_consistent_map = - (walk - iommu->page_table); - } - - /* Data for consistent mappings cannot enter the streaming - * buffers, so we only need to update the TSB. We flush - * the IOMMU here as well to prevent conflicts with the - * streaming mapping deferred tlb flush scheme. - */ - - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; - - for (i = 0; i < npages; i++, iopte++) - iopte_make_dummy(iommu, iopte); - - if (iommu->iommu_ctxflush) { - pci_iommu_write(iommu->iommu_ctxflush, ctx); - } else { - for (i = 0; i < npages; i++) { - u32 daddr = dvma + (i << IO_PAGE_SHIFT); - - pci_iommu_write(iommu->iommu_flush, daddr); - } - } - - iommu_free_ctx(iommu, ctx); + free_npages(iommu, dvma, npages); spin_unlock_irqrestore(&iommu->lock, flags); @@ -372,25 +309,27 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct iommu = pcp->pbm->iommu; strbuf = &pcp->pbm->stc; - if (direction == PCI_DMA_NONE) - BUG(); + if (unlikely(direction == PCI_DMA_NONE)) + goto bad_no_ctx; oaddr = (unsigned long)ptr; npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; spin_lock_irqsave(&iommu->lock, flags); + base = alloc_npages(iommu, npages); + ctx = 0; + if (iommu->iommu_ctxflush) + ctx = iommu_alloc_ctx(iommu); + spin_unlock_irqrestore(&iommu->lock, flags); - base = alloc_streaming_cluster(iommu, npages); - if (base == NULL) + if (unlikely(!base)) goto bad; + bus_addr = (iommu->page_table_map_base + ((base - iommu->page_table) << IO_PAGE_SHIFT)); ret = bus_addr | (oaddr & ~IO_PAGE_MASK); base_paddr = __pa(oaddr & IO_PAGE_MASK); - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = iommu_alloc_ctx(iommu); if (strbuf->strbuf_enabled) iopte_protection = IOPTE_STREAMING(ctx); else @@ -401,12 +340,13 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE) iopte_val(*base) = iopte_protection | base_paddr; - spin_unlock_irqrestore(&iommu->lock, flags); - return ret; bad: - spin_unlock_irqrestore(&iommu->lock, flags); + iommu_free_ctx(iommu, ctx); +bad_no_ctx: + if (printk_ratelimit()) + WARN_ON(1); return PCI_DMA_ERROR_CODE; } @@ -481,10 +421,13 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int struct pci_iommu *iommu; struct pci_strbuf *strbuf; iopte_t *base; - unsigned long flags, npages, ctx; + unsigned long flags, npages, ctx, i; - if (direction == PCI_DMA_NONE) - BUG(); + if (unlikely(direction == PCI_DMA_NONE)) { + if (printk_ratelimit()) + WARN_ON(1); + return; + } pcp = pdev->sysdata; iommu = pcp->pbm->iommu; @@ -510,13 +453,14 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int /* Step 1: Kick data out of streaming buffers if necessary. */ if (strbuf->strbuf_enabled) - pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, + npages, direction); - /* Step 2: Clear out first TSB entry. */ - iopte_make_dummy(iommu, base); + /* Step 2: Clear out TSB entries. */ + for (i = 0; i < npages; i++) + iopte_make_dummy(iommu, base + i); - free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, - npages, ctx); + free_npages(iommu, bus_addr - iommu->page_table_map_base, npages); iommu_free_ctx(iommu, ctx); @@ -621,6 +565,8 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int pci_map_single(pdev, (page_address(sglist->page) + sglist->offset), sglist->length, direction); + if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE)) + return 0; sglist->dma_length = sglist->length; return 1; } @@ -629,21 +575,29 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int iommu = pcp->pbm->iommu; strbuf = &pcp->pbm->stc; - if (direction == PCI_DMA_NONE) - BUG(); + if (unlikely(direction == PCI_DMA_NONE)) + goto bad_no_ctx; /* Step 1: Prepare scatter list. */ npages = prepare_sg(sglist, nelems); - /* Step 2: Allocate a cluster. */ + /* Step 2: Allocate a cluster and context, if necessary. */ spin_lock_irqsave(&iommu->lock, flags); - base = alloc_streaming_cluster(iommu, npages); + base = alloc_npages(iommu, npages); + ctx = 0; + if (iommu->iommu_ctxflush) + ctx = iommu_alloc_ctx(iommu); + + spin_unlock_irqrestore(&iommu->lock, flags); + if (base == NULL) goto bad; - dma_base = iommu->page_table_map_base + ((base - iommu->page_table) << IO_PAGE_SHIFT); + + dma_base = iommu->page_table_map_base + + ((base - iommu->page_table) << IO_PAGE_SHIFT); /* Step 3: Normalize DMA addresses. */ used = nelems; @@ -656,30 +610,28 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int } used = nelems - used; - /* Step 4: Choose a context if necessary. */ - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = iommu_alloc_ctx(iommu); - - /* Step 5: Create the mappings. */ + /* Step 4: Create the mappings. */ if (strbuf->strbuf_enabled) iopte_protection = IOPTE_STREAMING(ctx); else iopte_protection = IOPTE_CONSISTENT(ctx); if (direction != PCI_DMA_TODEVICE) iopte_protection |= IOPTE_WRITE; - fill_sg (base, sglist, used, nelems, iopte_protection); + + fill_sg(base, sglist, used, nelems, iopte_protection); + #ifdef VERIFY_SG verify_sglist(sglist, nelems, base, npages); #endif - spin_unlock_irqrestore(&iommu->lock, flags); - return used; bad: - spin_unlock_irqrestore(&iommu->lock, flags); - return PCI_DMA_ERROR_CODE; + iommu_free_ctx(iommu, ctx); +bad_no_ctx: + if (printk_ratelimit()) + WARN_ON(1); + return 0; } /* Unmap a set of streaming mode DMA translations. */ @@ -692,8 +644,10 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, unsigned long flags, ctx, i, npages; u32 bus_addr; - if (direction == PCI_DMA_NONE) - BUG(); + if (unlikely(direction == PCI_DMA_NONE)) { + if (printk_ratelimit()) + WARN_ON(1); + } pcp = pdev->sysdata; iommu = pcp->pbm->iommu; @@ -705,7 +659,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, if (sglist[i].dma_length == 0) break; i--; - npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT; + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - + bus_addr) >> IO_PAGE_SHIFT; base = iommu->page_table + ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); @@ -726,11 +681,11 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, if (strbuf->strbuf_enabled) pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); - /* Step 2: Clear out first TSB entry. */ - iopte_make_dummy(iommu, base); + /* Step 2: Clear out the TSB entries. */ + for (i = 0; i < npages; i++) + iopte_make_dummy(iommu, base + i); - free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, - npages, ctx); + free_npages(iommu, bus_addr - iommu->page_table_map_base, npages); iommu_free_ctx(iommu, ctx); diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c index 6ed1ef25e0ac..c03ed5f49d31 100644 --- a/arch/sparc64/kernel/pci_psycho.c +++ b/arch/sparc64/kernel/pci_psycho.c @@ -1207,13 +1207,9 @@ static void psycho_scan_bus(struct pci_controller_info *p) static void psycho_iommu_init(struct pci_controller_info *p) { struct pci_iommu *iommu = p->pbm_A.iommu; - unsigned long tsbbase, i; + unsigned long i; u64 control; - /* Setup initial software IOMMU state. */ - spin_lock_init(&iommu->lock); - iommu->ctx_lowest_free = 1; - /* Register addresses. */ iommu->iommu_control = p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL; iommu->iommu_tsbbase = p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE; @@ -1240,40 +1236,10 @@ static void psycho_iommu_init(struct pci_controller_info *p) /* Leave diag mode enabled for full-flushing done * in pci_iommu.c */ + pci_iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff); - iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); - if (!iommu->dummy_page) { - prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n"); - prom_halt(); - } - memset((void *)iommu->dummy_page, 0, PAGE_SIZE); - iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); - - /* Using assumed page size 8K with 128K entries we need 1MB iommu page - * table (128K ioptes * 8 bytes per iopte). This is - * page order 7 on UltraSparc. - */ - tsbbase = __get_free_pages(GFP_KERNEL, get_order(IO_TSB_SIZE)); - if (!tsbbase) { - prom_printf("PSYCHO_IOMMU: Error, gfp(tsb) failed.\n"); - prom_halt(); - } - iommu->page_table = (iopte_t *)tsbbase; - iommu->page_table_sz_bits = 17; - iommu->page_table_map_base = 0xc0000000; - iommu->dma_addr_mask = 0xffffffff; - pci_iommu_table_init(iommu, IO_TSB_SIZE); - - /* We start with no consistent mappings. */ - iommu->lowest_consistent_map = - 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); - - for (i = 0; i < PBM_NCLUSTERS; i++) { - iommu->alloc_info[i].flush = 0; - iommu->alloc_info[i].next = 0; - } - - psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE, __pa(tsbbase)); + psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE, + __pa(iommu->page_table)); control = psycho_read(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL); control &= ~(PSYCHO_IOMMU_CTRL_TSBSZ | PSYCHO_IOMMU_CTRL_TBWSZ); @@ -1281,7 +1247,7 @@ static void psycho_iommu_init(struct pci_controller_info *p) psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL, control); /* If necessary, hook us up for starfire IRQ translations. */ - if(this_is_starfire) + if (this_is_starfire) p->starfire_cookie = starfire_hookup(p->pbm_A.portid); else p->starfire_cookie = NULL; diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c index 0ee6bd5b9ac6..da8e1364194f 100644 --- a/arch/sparc64/kernel/pci_sabre.c +++ b/arch/sparc64/kernel/pci_sabre.c @@ -1267,13 +1267,9 @@ static void sabre_iommu_init(struct pci_controller_info *p, u32 dma_mask) { struct pci_iommu *iommu = p->pbm_A.iommu; - unsigned long tsbbase, i, order; + unsigned long i; u64 control; - /* Setup initial software IOMMU state. */ - spin_lock_init(&iommu->lock); - iommu->ctx_lowest_free = 1; - /* Register addresses. */ iommu->iommu_control = p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL; iommu->iommu_tsbbase = p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE; @@ -1295,26 +1291,10 @@ static void sabre_iommu_init(struct pci_controller_info *p, /* Leave diag mode enabled for full-flushing done * in pci_iommu.c */ + pci_iommu_table_init(iommu, tsbsize * 1024 * 8, dvma_offset, dma_mask); - iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); - if (!iommu->dummy_page) { - prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n"); - prom_halt(); - } - memset((void *)iommu->dummy_page, 0, PAGE_SIZE); - iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); - - tsbbase = __get_free_pages(GFP_KERNEL, order = get_order(tsbsize * 1024 * 8)); - if (!tsbbase) { - prom_printf("SABRE_IOMMU: Error, gfp(tsb) failed.\n"); - prom_halt(); - } - iommu->page_table = (iopte_t *)tsbbase; - iommu->page_table_map_base = dvma_offset; - iommu->dma_addr_mask = dma_mask; - pci_iommu_table_init(iommu, PAGE_SIZE << order); - - sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE, __pa(tsbbase)); + sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE, + __pa(iommu->page_table)); control = sabre_read(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL); control &= ~(SABRE_IOMMUCTRL_TSBSZ | SABRE_IOMMUCTRL_TBWSZ); @@ -1322,11 +1302,9 @@ static void sabre_iommu_init(struct pci_controller_info *p, switch(tsbsize) { case 64: control |= SABRE_IOMMU_TSBSZ_64K; - iommu->page_table_sz_bits = 16; break; case 128: control |= SABRE_IOMMU_TSBSZ_128K; - iommu->page_table_sz_bits = 17; break; default: prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize); @@ -1334,15 +1312,6 @@ static void sabre_iommu_init(struct pci_controller_info *p, break; } sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL, control); - - /* We start with no consistent mappings. */ - iommu->lowest_consistent_map = - 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); - - for (i = 0; i < PBM_NCLUSTERS; i++) { - iommu->alloc_info[i].flush = 0; - iommu->alloc_info[i].next = 0; - } } static void pbm_register_toplevel_resources(struct pci_controller_info *p, diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c index cae5b61fe2f0..d8c4e0919b4e 100644 --- a/arch/sparc64/kernel/pci_schizo.c +++ b/arch/sparc64/kernel/pci_schizo.c @@ -1765,7 +1765,7 @@ static void schizo_pbm_strbuf_init(struct pci_pbm_info *pbm) static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm) { struct pci_iommu *iommu = pbm->iommu; - unsigned long tsbbase, i, tagbase, database, order; + unsigned long i, tagbase, database; u32 vdma[2], dma_mask; u64 control; int err, tsbsize; @@ -1800,10 +1800,6 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm) prom_halt(); }; - /* Setup initial software IOMMU state. */ - spin_lock_init(&iommu->lock); - iommu->ctx_lowest_free = 1; - /* Register addresses, SCHIZO has iommu ctx flushing. */ iommu->iommu_control = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL; iommu->iommu_tsbbase = pbm->pbm_regs + SCHIZO_IOMMU_TSBBASE; @@ -1832,56 +1828,9 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm) /* Leave diag mode enabled for full-flushing done * in pci_iommu.c */ + pci_iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask); - iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); - if (!iommu->dummy_page) { - prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n"); - prom_halt(); - } - memset((void *)iommu->dummy_page, 0, PAGE_SIZE); - iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); - - /* Using assumed page size 8K with 128K entries we need 1MB iommu page - * table (128K ioptes * 8 bytes per iopte). This is - * page order 7 on UltraSparc. - */ - order = get_order(tsbsize * 8 * 1024); - tsbbase = __get_free_pages(GFP_KERNEL, order); - if (!tsbbase) { - prom_printf("%s: Error, gfp(tsb) failed.\n", pbm->name); - prom_halt(); - } - - iommu->page_table = (iopte_t *)tsbbase; - iommu->page_table_map_base = vdma[0]; - iommu->dma_addr_mask = dma_mask; - pci_iommu_table_init(iommu, PAGE_SIZE << order); - - switch (tsbsize) { - case 64: - iommu->page_table_sz_bits = 16; - break; - - case 128: - iommu->page_table_sz_bits = 17; - break; - - default: - prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize); - prom_halt(); - break; - }; - - /* We start with no consistent mappings. */ - iommu->lowest_consistent_map = - 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); - - for (i = 0; i < PBM_NCLUSTERS; i++) { - iommu->alloc_info[i].flush = 0; - iommu->alloc_info[i].next = 0; - } - - schizo_write(iommu->iommu_tsbbase, __pa(tsbbase)); + schizo_write(iommu->iommu_tsbbase, __pa(iommu->page_table)); control = schizo_read(iommu->iommu_control); control &= ~(SCHIZO_IOMMU_CTRL_TSBSZ | SCHIZO_IOMMU_CTRL_TBWSZ); diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 590df5a16f5a..b137fd63f5e1 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -1001,13 +1001,6 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs) preempt_enable(); } -extern unsigned long xcall_promstop; - -void smp_promstop_others(void) -{ - smp_cross_call(&xcall_promstop, 0, 0, 0); -} - #define prof_multiplier(__cpu) cpu_data(__cpu).multiplier #define prof_counter(__cpu) cpu_data(__cpu).counter diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 0d2e967c7200..1e44ee26cee8 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -105,7 +105,7 @@ static void __init read_obp_memory(const char *property, regs[i].phys_addr = base; regs[i].reg_size = size; } - sort(regs, ents, sizeof(struct linux_prom64_registers), + sort(regs, ents, sizeof(struct linux_prom64_registers), cmp_p64, NULL); } @@ -367,8 +367,11 @@ struct linux_prom_translation { unsigned long size; unsigned long data; }; -static struct linux_prom_translation prom_trans[512] __initdata; -static unsigned int prom_trans_ents __initdata; + +/* Exported for kernel TLB miss handling in ktlb.S */ +struct linux_prom_translation prom_trans[512] __read_mostly; +unsigned int prom_trans_ents __read_mostly; +unsigned int swapper_pgd_zero __read_mostly; extern unsigned long prom_boot_page; extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle); @@ -378,122 +381,57 @@ extern void register_prom_callbacks(void); /* Exported for SMP bootup purposes. */ unsigned long kern_locked_tte_data; -/* Exported for kernel TLB miss handling in ktlb.S */ -unsigned long prom_pmd_phys __read_mostly; -unsigned int swapper_pgd_zero __read_mostly; - -static pmd_t *prompmd __read_mostly; - -#define BASE_PAGE_SIZE 8192 - /* * Translate PROM's mapping we capture at boot time into physical address. * The second parameter is only set from prom_callback() invocations. */ unsigned long prom_virt_to_phys(unsigned long promva, int *error) { - pmd_t *pmdp = prompmd + ((promva >> 23) & 0x7ff); - pte_t *ptep; - unsigned long base; - - if (pmd_none(*pmdp)) { - if (error) - *error = 1; - return 0; - } - ptep = (pte_t *)__pmd_page(*pmdp) + ((promva >> 13) & 0x3ff); - if (!pte_present(*ptep)) { - if (error) - *error = 1; - return 0; - } - if (error) { - *error = 0; - return pte_val(*ptep); - } - base = pte_val(*ptep) & _PAGE_PADDR; - - return base + (promva & (BASE_PAGE_SIZE - 1)); -} + int i; -/* The obp translations are saved based on 8k pagesize, since obp can - * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> - * HI_OBP_ADDRESS range are handled in entry.S and do not use the vpte - * scheme (also, see rant in inherit_locked_prom_mappings()). - */ -static void __init build_obp_range(unsigned long start, unsigned long end, unsigned long data) -{ - unsigned long vaddr; + for (i = 0; i < prom_trans_ents; i++) { + struct linux_prom_translation *p = &prom_trans[i]; - for (vaddr = start; vaddr < end; vaddr += BASE_PAGE_SIZE) { - unsigned long val; - pmd_t *pmd; - pte_t *pte; + if (promva >= p->virt && + promva < (p->virt + p->size)) { + unsigned long base = p->data & _PAGE_PADDR; - pmd = prompmd + ((vaddr >> 23) & 0x7ff); - if (pmd_none(*pmd)) { - pte = __alloc_bootmem(BASE_PAGE_SIZE, BASE_PAGE_SIZE, - PAGE_SIZE); - if (!pte) - prom_halt(); - memset(pte, 0, BASE_PAGE_SIZE); - pmd_set(pmd, pte); + if (error) + *error = 0; + return base + (promva & (8192 - 1)); } - pte = (pte_t *) __pmd_page(*pmd) + ((vaddr >> 13) & 0x3ff); - - val = data; - - /* Clear diag TTE bits. */ - if (tlb_type == spitfire) - val &= ~0x0003fe0000000000UL; - - set_pte_at(&init_mm, vaddr, pte, - __pte(val | _PAGE_MODIFIED)); - - data += BASE_PAGE_SIZE; } + if (error) + *error = 1; + return 0UL; } +/* The obp translations are saved based on 8k pagesize, since obp can + * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> + * HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte + * scheme (also, see rant in inherit_locked_prom_mappings()). + */ static inline int in_obp_range(unsigned long vaddr) { return (vaddr >= LOW_OBP_ADDRESS && vaddr < HI_OBP_ADDRESS); } -#define OBP_PMD_SIZE 2048 -static void __init build_obp_pgtable(void) +static int cmp_ptrans(const void *a, const void *b) { - unsigned long i; - - prompmd = __alloc_bootmem(OBP_PMD_SIZE, OBP_PMD_SIZE, PAGE_SIZE); - if (!prompmd) - prom_halt(); - - memset(prompmd, 0, OBP_PMD_SIZE); - - prom_pmd_phys = __pa(prompmd); - - for (i = 0; i < prom_trans_ents; i++) { - unsigned long start, end; - - if (!in_obp_range(prom_trans[i].virt)) - continue; + const struct linux_prom_translation *x = a, *y = b; - start = prom_trans[i].virt; - end = start + prom_trans[i].size; - if (end > HI_OBP_ADDRESS) - end = HI_OBP_ADDRESS; - - build_obp_range(start, end, prom_trans[i].data); - } + if (x->virt > y->virt) + return 1; + if (x->virt < y->virt) + return -1; + return 0; } -/* Read OBP translations property into 'prom_trans[]'. - * Return the number of entries. - */ +/* Read OBP translations property into 'prom_trans[]'. */ static void __init read_obp_translations(void) { - int n, node; + int n, node, ents, first, last, i; node = prom_finddevice("/virtual-memory"); n = prom_getproplen(node, "translations"); @@ -515,7 +453,41 @@ static void __init read_obp_translations(void) n = n / sizeof(struct linux_prom_translation); - prom_trans_ents = n; + ents = n; + + sort(prom_trans, ents, sizeof(struct linux_prom_translation), + cmp_ptrans, NULL); + + /* Now kick out all the non-OBP entries. */ + for (i = 0; i < ents; i++) { + if (in_obp_range(prom_trans[i].virt)) + break; + } + first = i; + for (; i < ents; i++) { + if (!in_obp_range(prom_trans[i].virt)) + break; + } + last = i; + + for (i = 0; i < (last - first); i++) { + struct linux_prom_translation *src = &prom_trans[i + first]; + struct linux_prom_translation *dest = &prom_trans[i]; + + *dest = *src; + } + for (; i < ents; i++) { + struct linux_prom_translation *dest = &prom_trans[i]; + dest->virt = dest->size = dest->data = 0x0UL; + } + + prom_trans_ents = last - first; + + if (tlb_type == spitfire) { + /* Clear diag TTE bits. */ + for (i = 0; i < prom_trans_ents; i++) + prom_trans[i].data &= ~0x0003fe0000000000UL; + } } static void __init remap_kernel(void) @@ -553,21 +525,18 @@ static void __init remap_kernel(void) } -static void __init inherit_prom_mappings_pre(void) +static void __init inherit_prom_mappings(void) { read_obp_translations(); /* Now fixup OBP's idea about where we really are mapped. */ prom_printf("Remapping the kernel... "); remap_kernel(); - prom_printf("done.\n"); -} -static void __init inherit_prom_mappings_post(void) -{ - build_obp_pgtable(); + prom_printf("Registering callbacks... "); register_prom_callbacks(); + prom_printf("done.\n"); } /* The OBP specifications for sun4u mark 0xfffffffc00000000 and @@ -1519,7 +1488,7 @@ void __init paging_init(void) swapper_pgd_zero = pgd_val(swapper_pg_dir[0]); - inherit_prom_mappings_pre(); + inherit_prom_mappings(); /* Ok, we can use our TLB miss and window trap handlers safely. * We need to do a quick peek here to see if we are on StarFire @@ -1530,23 +1499,15 @@ void __init paging_init(void) extern void setup_tba(int); setup_tba(this_is_starfire); } - __flush_tlb_all(); - /* Everything from this point forward, until we are done with - * inherit_prom_mappings_post(), must complete successfully - * without calling into the firmware. The firwmare page tables - * have not been built, but we are running on the Linux kernel's - * trap table. - */ + inherit_locked_prom_mappings(1); + + __flush_tlb_all(); /* Setup bootmem... */ pages_avail = 0; last_valid_pfn = end_pfn = bootmem_init(&pages_avail); - inherit_prom_mappings_post(); - - inherit_locked_prom_mappings(1); - #ifdef CONFIG_DEBUG_PAGEALLOC kernel_physical_mapping_init(); #endif diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 058b8126c1a7..e4c9151fa116 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -453,22 +453,6 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address nop nop - .globl xcall_promstop -xcall_promstop: - rdpr %pstate, %g2 - wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate - rdpr %pil, %g2 - wrpr %g0, 15, %pil - sethi %hi(109f), %g7 - b,pt %xcc, etrap_irq -109: or %g7, %lo(109b), %g7 - flushw - call prom_stopself - nop - /* We should not return, just spin if we do... */ -1: b,a,pt %xcc, 1b - nop - .data errata32_hwbug: diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c index 9b895faf077b..87f5cfce23bb 100644 --- a/arch/sparc64/prom/misc.c +++ b/arch/sparc64/prom/misc.c @@ -68,19 +68,11 @@ void prom_cmdline(void) local_irq_restore(flags); } -#ifdef CONFIG_SMP -extern void smp_promstop_others(void); -#endif - /* Drop into the prom, but completely terminate the program. * No chance of continuing. */ void prom_halt(void) { -#ifdef CONFIG_SMP - smp_promstop_others(); - udelay(8000); -#endif again: p1275_cmd("exit", P1275_INOUT(0, 0)); goto again; /* PROM is out to get me -DaveM */ @@ -88,10 +80,6 @@ again: void prom_halt_power_off(void) { -#ifdef CONFIG_SMP - smp_promstop_others(); - udelay(8000); -#endif p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0)); /* if nothing else helps, we just halt */ diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 783e18cae090..de17d4c6e02d 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -13,7 +13,7 @@ mcast-objs := mcast_kern.o mcast_user.o net-objs := net_kern.o net_user.o mconsole-objs := mconsole_kern.o mconsole_user.o hostaudio-objs := hostaudio_kern.o -ubd-objs := ubd_kern.o +ubd-objs := ubd_kern.o ubd_user.o port-objs := port_kern.o port_user.o harddog-objs := harddog_kern.o harddog_user.o diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index e77a38da4350..f73134333f64 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -35,7 +35,6 @@ #include "linux/blkpg.h" #include "linux/genhd.h" #include "linux/spinlock.h" -#include "asm/atomic.h" #include "asm/segment.h" #include "asm/uaccess.h" #include "asm/irq.h" @@ -54,21 +53,20 @@ #include "mem.h" #include "mem_kern.h" #include "cow.h" -#include "aio.h" enum ubd_req { UBD_READ, UBD_WRITE }; struct io_thread_req { - enum aio_type op; + enum ubd_req op; int fds[2]; unsigned long offsets[2]; unsigned long long offset; unsigned long length; char *buffer; int sectorsize; - int bitmap_offset; - long bitmap_start; - long bitmap_end; + unsigned long sector_mask; + unsigned long long cow_offset; + unsigned long bitmap_words[2]; int error; }; @@ -82,31 +80,28 @@ extern int create_cow_file(char *cow_file, char *backing_file, unsigned long *bitmap_len_out, int *data_offset_out); extern int read_cow_bitmap(int fd, void *buf, int offset, int len); -extern void do_io(struct io_thread_req *req, struct request *r, - unsigned long *bitmap); +extern void do_io(struct io_thread_req *req); -static inline int ubd_test_bit(__u64 bit, void *data) +static inline int ubd_test_bit(__u64 bit, unsigned char *data) { - unsigned char *buffer = data; __u64 n; int bits, off; - bits = sizeof(buffer[0]) * 8; + bits = sizeof(data[0]) * 8; n = bit / bits; off = bit % bits; - return((buffer[n] & (1 << off)) != 0); + return((data[n] & (1 << off)) != 0); } -static inline void ubd_set_bit(__u64 bit, void *data) +static inline void ubd_set_bit(__u64 bit, unsigned char *data) { - unsigned char *buffer = data; __u64 n; int bits, off; - bits = sizeof(buffer[0]) * 8; + bits = sizeof(data[0]) * 8; n = bit / bits; off = bit % bits; - buffer[n] |= (1 << off); + data[n] |= (1 << off); } /*End stuff from ubd_user.h*/ @@ -115,6 +110,8 @@ static inline void ubd_set_bit(__u64 bit, void *data) static DEFINE_SPINLOCK(ubd_io_lock); static DEFINE_SPINLOCK(ubd_lock); +static void (*do_ubd)(void); + static int ubd_open(struct inode * inode, struct file * filp); static int ubd_release(struct inode * inode, struct file * file); static int ubd_ioctl(struct inode * inode, struct file * file, @@ -161,8 +158,6 @@ struct cow { int data_offset; }; -#define MAX_SG 64 - struct ubd { char *file; int count; @@ -173,7 +168,6 @@ struct ubd { int no_cow; struct cow cow; struct platform_device pdev; - struct scatterlist sg[MAX_SG]; }; #define DEFAULT_COW { \ @@ -466,114 +460,81 @@ __uml_help(fakehd, ); static void do_ubd_request(request_queue_t * q); -static int in_ubd; + +/* Only changed by ubd_init, which is an initcall. */ +int thread_fd = -1; /* Changed by ubd_handler, which is serialized because interrupts only * happen on CPU 0. */ int intr_count = 0; -static void ubd_end_request(struct request *req, int bytes, int uptodate) +/* call ubd_finish if you need to serialize */ +static void __ubd_finish(struct request *req, int error) { - if (!end_that_request_first(req, uptodate, bytes >> 9)) { - add_disk_randomness(req->rq_disk); - end_that_request_last(req); + int nsect; + + if(error){ + end_request(req, 0); + return; } + nsect = req->current_nr_sectors; + req->sector += nsect; + req->buffer += nsect << 9; + req->errors = 0; + req->nr_sectors -= nsect; + req->current_nr_sectors = 0; + end_request(req, 1); } -/* call ubd_finish if you need to serialize */ -static void __ubd_finish(struct request *req, int bytes) +static inline void ubd_finish(struct request *req, int error) { - if(bytes < 0){ - ubd_end_request(req, 0, 0); - return; - } - - ubd_end_request(req, bytes, 1); + spin_lock(&ubd_io_lock); + __ubd_finish(req, error); + spin_unlock(&ubd_io_lock); } -static inline void ubd_finish(struct request *req, int bytes) +/* Called without ubd_io_lock held */ +static void ubd_handler(void) { - spin_lock(&ubd_io_lock); - __ubd_finish(req, bytes); - spin_unlock(&ubd_io_lock); + struct io_thread_req req; + struct request *rq = elv_next_request(ubd_queue); + int n; + + do_ubd = NULL; + intr_count++; + n = os_read_file(thread_fd, &req, sizeof(req)); + if(n != sizeof(req)){ + printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " + "err = %d\n", os_getpid(), -n); + spin_lock(&ubd_io_lock); + end_request(rq, 0); + spin_unlock(&ubd_io_lock); + return; + } + + ubd_finish(rq, req.error); + reactivate_fd(thread_fd, UBD_IRQ); + do_ubd_request(ubd_queue); } -struct bitmap_io { - atomic_t count; - struct aio_context aio; -}; - -struct ubd_aio { - struct aio_context aio; - struct request *req; - int len; - struct bitmap_io *bitmap; - void *bitmap_buf; -}; - -static int ubd_reply_fd = -1; - static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) { - struct aio_thread_reply reply; - struct ubd_aio *aio; - struct request *req; - int err, n, fd = (int) (long) dev; - - while(1){ - err = os_read_file(fd, &reply, sizeof(reply)); - if(err == -EAGAIN) - break; - if(err < 0){ - printk("ubd_aio_handler - read returned err %d\n", - -err); - break; - } - - aio = container_of(reply.data, struct ubd_aio, aio); - n = reply.err; - - if(n == 0){ - req = aio->req; - req->nr_sectors -= aio->len >> 9; - - if((aio->bitmap != NULL) && - (atomic_dec_and_test(&aio->bitmap->count))){ - aio->aio = aio->bitmap->aio; - aio->len = 0; - kfree(aio->bitmap); - aio->bitmap = NULL; - submit_aio(&aio->aio); - } - else { - if((req->nr_sectors == 0) && - (aio->bitmap == NULL)){ - int len = req->hard_nr_sectors << 9; - ubd_finish(req, len); - } - - if(aio->bitmap_buf != NULL) - kfree(aio->bitmap_buf); - kfree(aio); - } - } - else if(n < 0){ - ubd_finish(aio->req, n); - if(aio->bitmap != NULL) - kfree(aio->bitmap); - if(aio->bitmap_buf != NULL) - kfree(aio->bitmap_buf); - kfree(aio); - } - } - reactivate_fd(fd, UBD_IRQ); + ubd_handler(); + return(IRQ_HANDLED); +} - do_ubd_request(ubd_queue); +/* Only changed by ubd_init, which is an initcall. */ +static int io_pid = -1; - return(IRQ_HANDLED); +void kill_io_thread(void) +{ + if(io_pid != -1) + os_kill_process(io_pid, 1); } +__uml_exitcall(kill_io_thread); + static int ubd_file_size(struct ubd *dev, __u64 *size_out) { char *file; @@ -608,7 +569,7 @@ static int ubd_open_dev(struct ubd *dev) &dev->cow.data_offset, create_ptr); if((dev->fd == -ENOENT) && create_cow){ - dev->fd = create_cow_file(dev->file, dev->cow.file, + dev->fd = create_cow_file(dev->file, dev->cow.file, dev->openflags, 1 << 9, PAGE_SIZE, &dev->cow.bitmap_offset, &dev->cow.bitmap_len, @@ -870,10 +831,6 @@ int ubd_init(void) { int i; - ubd_reply_fd = init_aio_irq(UBD_IRQ, "ubd", ubd_intr); - if(ubd_reply_fd < 0) - printk("Setting up ubd AIO failed, err = %d\n", ubd_reply_fd); - devfs_mk_dir("ubd"); if (register_blkdev(MAJOR_NR, "ubd")) return -1; @@ -884,7 +841,6 @@ int ubd_init(void) return -1; } - blk_queue_max_hw_segments(ubd_queue, MAX_SG); if (fake_major != MAJOR_NR) { char name[sizeof("ubd_nnn\0")]; @@ -896,12 +852,40 @@ int ubd_init(void) driver_register(&ubd_driver); for (i = 0; i < MAX_DEV; i++) ubd_add(i); - return 0; } late_initcall(ubd_init); +int ubd_driver_init(void){ + unsigned long stack; + int err; + + /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ + if(global_openflags.s){ + printk(KERN_INFO "ubd: Synchronous mode\n"); + /* Letting ubd=sync be like using ubd#s= instead of ubd#= is + * enough. So use anyway the io thread. */ + } + stack = alloc_stack(0, 0); + io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), + &thread_fd); + if(io_pid < 0){ + printk(KERN_ERR + "ubd : Failed to start I/O thread (errno = %d) - " + "falling back to synchronous I/O\n", -io_pid); + io_pid = -1; + return(0); + } + err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, + SA_INTERRUPT, "ubd", ubd_dev); + if(err != 0) + printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); + return(err); +} + +device_initcall(ubd_driver_init); + static int ubd_open(struct inode *inode, struct file *filp) { struct gendisk *disk = inode->i_bdev->bd_disk; @@ -939,55 +923,105 @@ static int ubd_release(struct inode * inode, struct file * file) return(0); } -static void cowify_bitmap(struct io_thread_req *req, unsigned long *bitmap) +static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, + __u64 *cow_offset, unsigned long *bitmap, + __u64 bitmap_offset, unsigned long *bitmap_words, + __u64 bitmap_len) { - __u64 sector = req->offset / req->sectorsize; - int i; + __u64 sector = io_offset >> 9; + int i, update_bitmap = 0; + + for(i = 0; i < length >> 9; i++){ + if(cow_mask != NULL) + ubd_set_bit(i, (unsigned char *) cow_mask); + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + continue; - for(i = 0; i < req->length / req->sectorsize; i++){ - if(ubd_test_bit(sector + i, bitmap)) - continue; + update_bitmap = 1; + ubd_set_bit(sector + i, (unsigned char *) bitmap); + } + + if(!update_bitmap) + return; - if(req->bitmap_start == -1) - req->bitmap_start = sector + i; - req->bitmap_end = sector + i + 1; + *cow_offset = sector / (sizeof(unsigned long) * 8); - ubd_set_bit(sector + i, bitmap); - } + /* This takes care of the case where we're exactly at the end of the + * device, and *cow_offset + 1 is off the end. So, just back it up + * by one word. Thanks to Lynn Kerby for the fix and James McMechan + * for the original diagnosis. + */ + if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / + sizeof(unsigned long) - 1)) + (*cow_offset)--; + + bitmap_words[0] = bitmap[*cow_offset]; + bitmap_words[1] = bitmap[*cow_offset + 1]; + + *cow_offset *= sizeof(unsigned long); + *cow_offset += bitmap_offset; +} + +static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, + __u64 bitmap_offset, __u64 bitmap_len) +{ + __u64 sector = req->offset >> 9; + int i; + + if(req->length > (sizeof(req->sector_mask) * 8) << 9) + panic("Operation too long"); + + if(req->op == UBD_READ) { + for(i = 0; i < req->length >> 9; i++){ + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + ubd_set_bit(i, (unsigned char *) + &req->sector_mask); + } + } + else cowify_bitmap(req->offset, req->length, &req->sector_mask, + &req->cow_offset, bitmap, bitmap_offset, + req->bitmap_words, bitmap_len); } /* Called with ubd_io_lock held */ -static int prepare_request(struct request *req, struct io_thread_req *io_req, - unsigned long long offset, int page_offset, - int len, struct page *page) +static int prepare_request(struct request *req, struct io_thread_req *io_req) { struct gendisk *disk = req->rq_disk; struct ubd *dev = disk->private_data; + __u64 offset; + int len; + + if(req->rq_status == RQ_INACTIVE) return(1); /* This should be impossible now */ if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ printk("Write attempted on readonly ubd device %s\n", disk->disk_name); - ubd_end_request(req, 0, 0); + end_request(req, 0); return(1); } + offset = ((__u64) req->sector) << 9; + len = req->current_nr_sectors << 9; + io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; io_req->fds[1] = dev->fd; + io_req->cow_offset = -1; io_req->offset = offset; io_req->length = len; io_req->error = 0; - io_req->op = (rq_data_dir(req) == READ) ? AIO_READ : AIO_WRITE; + io_req->sector_mask = 0; + + io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; - io_req->buffer = page_address(page) + page_offset; + io_req->buffer = req->buffer; io_req->sectorsize = 1 << 9; - io_req->bitmap_offset = dev->cow.bitmap_offset; - io_req->bitmap_start = -1; - io_req->bitmap_end = -1; - if((dev->cow.file != NULL) && (io_req->op == UBD_WRITE)) - cowify_bitmap(io_req, dev->cow.bitmap); + if(dev->cow.file != NULL) + cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, + dev->cow.bitmap_len); + return(0); } @@ -996,36 +1030,30 @@ static void do_ubd_request(request_queue_t *q) { struct io_thread_req io_req; struct request *req; - __u64 sector; - int err; - - if(in_ubd) - return; - in_ubd = 1; - while((req = elv_next_request(q)) != NULL){ - struct gendisk *disk = req->rq_disk; - struct ubd *dev = disk->private_data; - int n, i; - - blkdev_dequeue_request(req); - - sector = req->sector; - n = blk_rq_map_sg(q, req, dev->sg); - - for(i = 0; i < n; i++){ - struct scatterlist *sg = &dev->sg[i]; - - err = prepare_request(req, &io_req, sector << 9, - sg->offset, sg->length, - sg->page); - if(err) - continue; - - sector += sg->length >> 9; - do_io(&io_req, req, dev->cow.bitmap); + int err, n; + + if(thread_fd == -1){ + while((req = elv_next_request(q)) != NULL){ + err = prepare_request(req, &io_req); + if(!err){ + do_io(&io_req); + __ubd_finish(req, io_req.error); + } + } + } + else { + if(do_ubd || (req = elv_next_request(q)) == NULL) + return; + err = prepare_request(req, &io_req); + if(!err){ + do_ubd = ubd_handler; + n = os_write_file(thread_fd, (char *) &io_req, + sizeof(io_req)); + if(n != sizeof(io_req)) + printk("write to io thread failed, " + "errno = %d\n", -n); } } - in_ubd = 0; } static int ubd_ioctl(struct inode * inode, struct file * file, @@ -1241,95 +1269,131 @@ int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, return(err); } -void do_io(struct io_thread_req *req, struct request *r, unsigned long *bitmap) +static int update_bitmap(struct io_thread_req *req) { - struct ubd_aio *aio; - struct bitmap_io *bitmap_io = NULL; - char *buf; - void *bitmap_buf = NULL; - unsigned long len, sector; - int nsectors, start, end, bit, err; - __u64 off; - - if(req->bitmap_start != -1){ - /* Round up to the nearest word */ - int round = sizeof(unsigned long); - len = (req->bitmap_end - req->bitmap_start + - round * 8 - 1) / (round * 8); - len *= round; - - off = req->bitmap_start / (8 * round); - off *= round; - - bitmap_io = kmalloc(sizeof(*bitmap_io), GFP_KERNEL); - if(bitmap_io == NULL){ - printk("Failed to kmalloc bitmap IO\n"); - req->error = 1; - return; - } + int n; - bitmap_buf = kmalloc(len, GFP_KERNEL); - if(bitmap_buf == NULL){ - printk("do_io : kmalloc of bitmap chunk " - "failed\n"); - kfree(bitmap_io); - req->error = 1; - return; - } - memcpy(bitmap_buf, &bitmap[off / sizeof(bitmap[0])], len); - - *bitmap_io = ((struct bitmap_io) - { .count = ATOMIC_INIT(0), - .aio = INIT_AIO(AIO_WRITE, req->fds[1], - bitmap_buf, len, - req->bitmap_offset + off, - ubd_reply_fd) } ); - } + if(req->cow_offset == -1) + return(0); - nsectors = req->length / req->sectorsize; - start = 0; - end = nsectors; - bit = 0; - do { - if(bitmap != NULL){ - sector = req->offset / req->sectorsize; - bit = ubd_test_bit(sector + start, bitmap); - end = start; - while((end < nsectors) && - (ubd_test_bit(sector + end, bitmap) == bit)) - end++; - } + n = os_seek_file(req->fds[1], req->cow_offset); + if(n < 0){ + printk("do_io - bitmap lseek failed : err = %d\n", -n); + return(1); + } - off = req->offsets[bit] + req->offset + - start * req->sectorsize; - len = (end - start) * req->sectorsize; - buf = &req->buffer[start * req->sectorsize]; + n = os_write_file(req->fds[1], &req->bitmap_words, + sizeof(req->bitmap_words)); + if(n != sizeof(req->bitmap_words)){ + printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, + req->fds[1]); + return(1); + } - aio = kmalloc(sizeof(*aio), GFP_KERNEL); - if(aio == NULL){ - req->error = 1; - return; - } + return(0); +} - *aio = ((struct ubd_aio) - { .aio = INIT_AIO(req->op, req->fds[bit], buf, - len, off, ubd_reply_fd), - .len = len, - .req = r, - .bitmap = bitmap_io, - .bitmap_buf = bitmap_buf }); - - if(aio->bitmap != NULL) - atomic_inc(&aio->bitmap->count); - - err = submit_aio(&aio->aio); - if(err){ - printk("do_io - submit_aio failed, " - "err = %d\n", err); - req->error = 1; - return; - } +void do_io(struct io_thread_req *req) +{ + char *buf; + unsigned long len; + int n, nsectors, start, end, bit; + int err; + __u64 off; + + nsectors = req->length / req->sectorsize; + start = 0; + do { + bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); + end = start; + while((end < nsectors) && + (ubd_test_bit(end, (unsigned char *) + &req->sector_mask) == bit)) + end++; + + off = req->offset + req->offsets[bit] + + start * req->sectorsize; + len = (end - start) * req->sectorsize; + buf = &req->buffer[start * req->sectorsize]; + + err = os_seek_file(req->fds[bit], off); + if(err < 0){ + printk("do_io - lseek failed : err = %d\n", -err); + req->error = 1; + return; + } + if(req->op == UBD_READ){ + n = 0; + do { + buf = &buf[n]; + len -= n; + n = os_read_file(req->fds[bit], buf, len); + if (n < 0) { + printk("do_io - read failed, err = %d " + "fd = %d\n", -n, req->fds[bit]); + req->error = 1; + return; + } + } while((n < len) && (n != 0)); + if (n < len) memset(&buf[n], 0, len - n); + } else { + n = os_write_file(req->fds[bit], buf, len); + if(n != len){ + printk("do_io - write failed err = %d " + "fd = %d\n", -n, req->fds[bit]); + req->error = 1; + return; + } + } + + start = end; + } while(start < nsectors); - start = end; - } while(start < nsectors); + req->error = update_bitmap(req); } + +/* Changed in start_io_thread, which is serialized by being called only + * from ubd_init, which is an initcall. + */ +int kernel_fd = -1; + +/* Only changed by the io thread */ +int io_count = 0; + +int io_thread(void *arg) +{ + struct io_thread_req req; + int n; + + ignore_sigwinch_sig(); + while(1){ + n = os_read_file(kernel_fd, &req, sizeof(req)); + if(n != sizeof(req)){ + if(n < 0) + printk("io_thread - read failed, fd = %d, " + "err = %d\n", kernel_fd, -n); + else { + printk("io_thread - short read, fd = %d, " + "length = %d\n", kernel_fd, n); + } + continue; + } + io_count++; + do_io(&req); + n = os_write_file(kernel_fd, &req, sizeof(req)); + if(n != sizeof(req)) + printk("io_thread - write failed, fd = %d, err = %d\n", + kernel_fd, -n); + } +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c new file mode 100644 index 000000000000..b94d2bc4fe06 --- /dev/null +++ b/arch/um/drivers/ubd_user.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com) + * Licensed under the GPL + */ + +#include <stddef.h> +#include <unistd.h> +#include <errno.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include <netinet/in.h> +#include <sys/time.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <sys/param.h> +#include "asm/types.h" +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "ubd_user.h" +#include "os.h" +#include "cow.h" + +#include <endian.h> +#include <byteswap.h> + +void ignore_sigwinch_sig(void) +{ + signal(SIGWINCH, SIG_IGN); +} + +int start_io_thread(unsigned long sp, int *fd_out) +{ + int pid, fds[2], err; + + err = os_pipe(fds, 1, 1); + if(err < 0){ + printk("start_io_thread - os_pipe failed, err = %d\n", -err); + goto out; + } + + kernel_fd = fds[0]; + *fd_out = fds[1]; + + pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, + NULL); + if(pid < 0){ + printk("start_io_thread - clone failed : errno = %d\n", errno); + err = -errno; + goto out_close; + } + + return(pid); + + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + kernel_fd = -1; + *fd_out = -1; + out: + return(err); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/aio.h b/arch/um/include/aio.h index 83f16877ab08..423bae9153f8 100644 --- a/arch/um/include/aio.h +++ b/arch/um/include/aio.h @@ -14,27 +14,15 @@ struct aio_thread_reply { }; struct aio_context { - enum aio_type type; - int fd; - void *data; - int len; - unsigned long long offset; int reply_fd; struct aio_context *next; }; -#define INIT_AIO(aio_type, aio_fd, aio_data, aio_len, aio_offset, \ - aio_reply_fd) \ - { .type = aio_type, \ - .fd = aio_fd, \ - .data = aio_data, \ - .len = aio_len, \ - .offset = aio_offset, \ - .reply_fd = aio_reply_fd } - #define INIT_AIO_CONTEXT { .reply_fd = -1, \ .next = NULL } -extern int submit_aio(struct aio_context *aio); +extern int submit_aio(enum aio_type type, int fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio); #endif diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 6f766e1faecc..2e58e304b8be 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -6,6 +6,7 @@ #ifndef __OS_H__ #define __OS_H__ +#include "uml-config.h" #include "asm/types.h" #include "../os/include/file.h" @@ -159,7 +160,11 @@ extern int can_do_skas(void); /* Make sure they are clear when running in TT mode. Required by * SEGV_MAYBE_FIXABLE */ +#ifdef UML_CONFIG_MODE_SKAS #define clear_can_do_skas() do { ptrace_faultinfo = proc_mm = 0; } while (0) +#else +#define clear_can_do_skas() do {} while (0) +#endif /* mem.c */ extern int create_mem_file(unsigned long len); diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c index f6e64026f995..41cfb0944201 100644 --- a/arch/um/os-Linux/aio.c +++ b/arch/um/os-Linux/aio.c @@ -6,7 +6,6 @@ #include <stdlib.h> #include <unistd.h> #include <signal.h> -#include <string.h> #include <errno.h> #include <sched.h> #include <sys/syscall.h> @@ -17,31 +16,18 @@ #include "user.h" #include "mode.h" +struct aio_thread_req { + enum aio_type type; + int io_fd; + unsigned long long offset; + char *buf; + int len; + struct aio_context *aio; +}; + static int aio_req_fd_r = -1; static int aio_req_fd_w = -1; -static int update_aio(struct aio_context *aio, int res) -{ - if(res < 0) - aio->len = res; - else if((res == 0) && (aio->type == AIO_READ)){ - /* This is the EOF case - we have hit the end of the file - * and it ends in a partial block, so we fill the end of - * the block with zeros and claim success. - */ - memset(aio->data, 0, aio->len); - aio->len = 0; - } - else if(res > 0){ - aio->len -= res; - aio->data += res; - aio->offset += res; - return aio->len; - } - - return 0; -} - #if defined(HAVE_AIO_ABI) #include <linux/aio_abi.h> @@ -80,7 +66,8 @@ static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, * that it now backs the mmapped area. */ -static int do_aio(aio_context_t ctx, struct aio_context *aio) +static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, + int len, unsigned long long offset, struct aio_context *aio) { struct iocb iocb, *iocbp = &iocb; char c; @@ -88,39 +75,40 @@ static int do_aio(aio_context_t ctx, struct aio_context *aio) iocb = ((struct iocb) { .aio_data = (unsigned long) aio, .aio_reqprio = 0, - .aio_fildes = aio->fd, - .aio_buf = (unsigned long) aio->data, - .aio_nbytes = aio->len, - .aio_offset = aio->offset, + .aio_fildes = fd, + .aio_buf = (unsigned long) buf, + .aio_nbytes = len, + .aio_offset = offset, .aio_reserved1 = 0, .aio_reserved2 = 0, .aio_reserved3 = 0 }); - switch(aio->type){ + switch(type){ case AIO_READ: iocb.aio_lio_opcode = IOCB_CMD_PREAD; + err = io_submit(ctx, 1, &iocbp); break; case AIO_WRITE: iocb.aio_lio_opcode = IOCB_CMD_PWRITE; + err = io_submit(ctx, 1, &iocbp); break; case AIO_MMAP: iocb.aio_lio_opcode = IOCB_CMD_PREAD; iocb.aio_buf = (unsigned long) &c; iocb.aio_nbytes = sizeof(c); + err = io_submit(ctx, 1, &iocbp); break; default: - printk("Bogus op in do_aio - %d\n", aio->type); + printk("Bogus op in do_aio - %d\n", type); err = -EINVAL; - goto out; + break; } - err = io_submit(ctx, 1, &iocbp); if(err > 0) err = 0; else err = -errno; - out: return err; } @@ -129,9 +117,8 @@ static aio_context_t ctx = 0; static int aio_thread(void *arg) { struct aio_thread_reply reply; - struct aio_context *aio; struct io_event event; - int err, n; + int err, n, reply_fd; signal(SIGWINCH, SIG_IGN); @@ -144,22 +131,14 @@ static int aio_thread(void *arg) "errno = %d\n", errno); } else { - /* This is safe as we've just a pointer here. */ - aio = (struct aio_context *) (long) event.data; - if(update_aio(aio, event.res)){ - do_aio(ctx, aio); - continue; - } - reply = ((struct aio_thread_reply) - { .data = aio, - .err = aio->len }); - err = os_write_file(aio->reply_fd, &reply, - sizeof(reply)); + { .data = (void *) (long) event.data, + .err = event.res }); + reply_fd = ((struct aio_context *) reply.data)->reply_fd; + err = os_write_file(reply_fd, &reply, sizeof(reply)); if(err != sizeof(reply)) - printk("aio_thread - write failed, " - "fd = %d, err = %d\n", aio->reply_fd, - -err); + printk("aio_thread - write failed, fd = %d, " + "err = %d\n", aio_req_fd_r, -err); } } return 0; @@ -167,35 +146,35 @@ static int aio_thread(void *arg) #endif -static int do_not_aio(struct aio_context *aio) +static int do_not_aio(struct aio_thread_req *req) { char c; int err; - switch(aio->type){ + switch(req->type){ case AIO_READ: - err = os_seek_file(aio->fd, aio->offset); + err = os_seek_file(req->io_fd, req->offset); if(err) goto out; - err = os_read_file(aio->fd, aio->data, aio->len); + err = os_read_file(req->io_fd, req->buf, req->len); break; case AIO_WRITE: - err = os_seek_file(aio->fd, aio->offset); + err = os_seek_file(req->io_fd, req->offset); if(err) goto out; - err = os_write_file(aio->fd, aio->data, aio->len); + err = os_write_file(req->io_fd, req->buf, req->len); break; case AIO_MMAP: - err = os_seek_file(aio->fd, aio->offset); + err = os_seek_file(req->io_fd, req->offset); if(err) goto out; - err = os_read_file(aio->fd, &c, sizeof(c)); + err = os_read_file(req->io_fd, &c, sizeof(c)); break; default: - printk("do_not_aio - bad request type : %d\n", aio->type); + printk("do_not_aio - bad request type : %d\n", req->type); err = -EINVAL; break; } @@ -206,14 +185,14 @@ static int do_not_aio(struct aio_context *aio) static int not_aio_thread(void *arg) { - struct aio_context *aio; + struct aio_thread_req req; struct aio_thread_reply reply; int err; signal(SIGWINCH, SIG_IGN); while(1){ - err = os_read_file(aio_req_fd_r, &aio, sizeof(aio)); - if(err != sizeof(aio)){ + err = os_read_file(aio_req_fd_r, &req, sizeof(req)); + if(err != sizeof(req)){ if(err < 0) printk("not_aio_thread - read failed, " "fd = %d, err = %d\n", aio_req_fd_r, @@ -224,34 +203,17 @@ static int not_aio_thread(void *arg) } continue; } - again: - err = do_not_aio(aio); - - if(update_aio(aio, err)) - goto again; - - reply = ((struct aio_thread_reply) { .data = aio, - .err = aio->len }); - err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); + err = do_not_aio(&req); + reply = ((struct aio_thread_reply) { .data = req.aio, + .err = err }); + err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply)); if(err != sizeof(reply)) printk("not_aio_thread - write failed, fd = %d, " "err = %d\n", aio_req_fd_r, -err); } } -static int submit_aio_24(struct aio_context *aio) -{ - int err; - - err = os_write_file(aio_req_fd_w, &aio, sizeof(aio)); - if(err == sizeof(aio)) - err = 0; - - return err; -} - static int aio_pid = -1; -static int (*submit_proc)(struct aio_context *aio); static int init_aio_24(void) { @@ -283,33 +245,11 @@ static int init_aio_24(void) #endif printk("2.6 host AIO support not used - falling back to I/O " "thread\n"); - - submit_proc = submit_aio_24; - return 0; } #ifdef HAVE_AIO_ABI #define DEFAULT_24_AIO 0 -static int submit_aio_26(struct aio_context *aio) -{ - struct aio_thread_reply reply; - int err; - - err = do_aio(ctx, aio); - if(err){ - reply = ((struct aio_thread_reply) { .data = aio, - .err = err }); - err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); - if(err != sizeof(reply)) - printk("submit_aio_26 - write failed, " - "fd = %d, err = %d\n", aio->reply_fd, -err); - else err = 0; - } - - return err; -} - static int init_aio_26(void) { unsigned long stack; @@ -330,22 +270,39 @@ static int init_aio_26(void) aio_pid = err; printk("Using 2.6 host AIO\n"); + return 0; +} + +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + struct aio_thread_reply reply; + int err; - submit_proc = submit_aio_26; + err = do_aio(ctx, type, io_fd, buf, len, offset, aio); + if(err){ + reply = ((struct aio_thread_reply) { .data = aio, + .err = err }); + err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); + if(err != sizeof(reply)) + printk("submit_aio_26 - write failed, " + "fd = %d, err = %d\n", aio->reply_fd, -err); + else err = 0; + } - return 0; + return err; } #else #define DEFAULT_24_AIO 1 -static int submit_aio_26(struct aio_context *aio) +static int init_aio_26(void) { return -ENOSYS; } -static int init_aio_26(void) +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) { - submit_proc = submit_aio_26; return -ENOSYS; } #endif @@ -412,7 +369,33 @@ static void exit_aio(void) __uml_exitcall(exit_aio); -int submit_aio(struct aio_context *aio) +static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) { - return (*submit_proc)(aio); + struct aio_thread_req req = { .type = type, + .io_fd = io_fd, + .offset = offset, + .buf = buf, + .len = len, + .aio = aio, + }; + int err; + + err = os_write_file(aio_req_fd_w, &req, sizeof(req)); + if(err == sizeof(req)) + err = 0; + + return err; +} + +int submit_aio(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio) +{ + aio->reply_fd = reply_fd; + if(aio_24) + return submit_aio_24(type, io_fd, buf, len, offset, aio); + else { + return submit_aio_26(type, io_fd, buf, len, offset, aio); + } } |