From 81d68a96a39844853b37f20cc8282d9b65b78ef3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace irq disabled critical timings This patch adds latency tracing for critical timings (how long interrupts are disabled for). "irqsoff" is added to /debugfs/tracing/available_tracers Note: tracing_max_latency also holds the max latency for irqsoff (in usecs). (default to large number so one must start latency tracing) tracing_thresh threshold (in usecs) to always print out if irqs off is detected to be longer than stated here. If irq_thresh is non-zero, then max_irq_latency is ignored. Here's an example of a trace with ftrace_enabled = 0 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 100 us, #3/3, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1d.s3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1d.s3 100us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1d.s3 100us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= And this is a trace with ftrace_enabled == 1 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 -------------------------------------------------------------------- latency: 102 us, #12/12, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1dNs3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_read_phy_reg+0x16/0x225 [e1000] (e1000_update_stats+0x5e2/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_swfw_sync_acquire+0x10/0x99 [e1000] (e1000_read_phy_reg+0x49/0x225 [e1000]) swapper-0 1dNs3 46us : e1000_get_hw_eeprom_semaphore+0x12/0xa6 [e1000] (e1000_swfw_sync_acquire+0x36/0x99 [e1000]) swapper-0 1dNs3 47us : __const_udelay+0x9/0x47 (e1000_read_phy_reg+0x116/0x225 [e1000]) swapper-0 1dNs3 47us+: __delay+0x9/0x50 (__const_udelay+0x45/0x47) swapper-0 1dNs3 97us : preempt_schedule+0xc/0x84 (__delay+0x4e/0x50) swapper-0 1dNs3 98us : e1000_swfw_sync_release+0xc/0x55 [e1000] (e1000_read_phy_reg+0x211/0x225 [e1000]) swapper-0 1dNs3 99us+: e1000_put_hw_eeprom_semaphore+0x9/0x35 [e1000] (e1000_swfw_sync_release+0x50/0x55 [e1000]) swapper-0 1dNs3 101us : _spin_unlock_irqrestore+0xe/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/lib/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/lib/Makefile') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 76f60f52a885..84aa2883fe15 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_SMP) := msr-on-cpu.o lib-y := delay_$(BITS).o +lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o lib-y += memcpy_$(BITS).o -- cgit v1.2.3 From f0fbf0abc093ec8bf64506eee4ede9e5daf40ffd Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 3 Jul 2008 12:35:41 -0300 Subject: x86: integrate delay functions. delay_32.c, delay_64.c are now equal, and are integrated into delay.c. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 2 +- arch/x86/lib/delay.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/delay_32.c | 138 ------------------------------------------------ arch/x86/lib/delay_64.c | 128 -------------------------------------------- 4 files changed, 138 insertions(+), 267 deletions(-) create mode 100644 arch/x86/lib/delay.c delete mode 100644 arch/x86/lib/delay_32.c delete mode 100644 arch/x86/lib/delay_64.c (limited to 'arch/x86/lib/Makefile') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 76f60f52a885..86960a6c41c0 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_SMP) := msr-on-cpu.o -lib-y := delay_$(BITS).o +lib-y := delay.o lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o lib-y += memcpy_$(BITS).o diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c new file mode 100644 index 000000000000..f4568605d7d5 --- /dev/null +++ b/arch/x86/lib/delay.c @@ -0,0 +1,137 @@ +/* + * Precise Delay Loops for i386 + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares + * Copyright (C) 2008 Jiri Hladky + * + * The __delay function must _NOT_ be inlined as its execution time + * depends wildly on alignment on many x86 processors. The additional + * jump magic is needed to get the timing stable on all the CPU's + * we have to worry about. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_SMP +# include +#endif + +/* simple loop based delay: */ +static void delay_loop(unsigned long loops) +{ + asm volatile( + " test %0,%0 \n" + " jz 3f \n" + " jmp 1f \n" + + ".align 16 \n" + "1: jmp 2f \n" + + ".align 16 \n" + "2: dec %0 \n" + " jnz 2b \n" + "3: dec %0 \n" + + : /* we don't need output */ + :"a" (loops) + ); +} + +/* TSC based delay: */ +static void delay_tsc(unsigned long loops) +{ + unsigned long bclock, now; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + rdtscl(bclock); + for (;;) { + rdtscl(now); + if ((now - bclock) >= loops) + break; + + /* Allow RT tasks to run */ + preempt_enable(); + rep_nop(); + preempt_disable(); + + /* + * It is possible that we moved to another CPU, and + * since TSC's are per-cpu we need to calculate + * that. The delay must guarantee that we wait "at + * least" the amount of time. Being moved to another + * CPU could make the wait longer but we just need to + * make sure we waited long enough. Rebalance the + * counter for this CPU. + */ + if (unlikely(cpu != smp_processor_id())) { + loops -= (now - bclock); + cpu = smp_processor_id(); + rdtscl(bclock); + } + } + preempt_enable(); +} + +/* + * Since we calibrate only once at boot, this + * function should be set once at boot and not changed + */ +static void (*delay_fn)(unsigned long) = delay_loop; + +void use_tsc_delay(void) +{ + delay_fn = delay_tsc; +} + +int __devinit read_current_timer(unsigned long *timer_val) +{ + if (delay_fn == delay_tsc) { + rdtscll(*timer_val); + return 0; + } + return -1; +} + +void __delay(unsigned long loops) +{ + delay_fn(loops); +} +EXPORT_SYMBOL(__delay); + +inline void __const_udelay(unsigned long xloops) +{ + int d0; + + xloops *= 4; + asm("mull %%edx" + :"=d" (xloops), "=&a" (d0) + :"1" (xloops), "0" + (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); + + __delay(++xloops); +} +EXPORT_SYMBOL(__const_udelay); + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ +} +EXPORT_SYMBOL(__udelay); + +void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c deleted file mode 100644 index 0b659a320b1e..000000000000 --- a/arch/x86/lib/delay_32.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Precise Delay Loops for i386 - * - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares - * Copyright (C) 2008 Jiri Hladky - * - * The __delay function must _NOT_ be inlined as its execution time - * depends wildly on alignment on many x86 processors. The additional - * jump magic is needed to get the timing stable on all the CPU's - * we have to worry about. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_SMP -# include -#endif - -/* simple loop based delay: */ -static void delay_loop(unsigned long loops) -{ - __asm__ __volatile__( - " test %0,%0 \n" - " jz 3f \n" - " jmp 1f \n" - - ".align 16 \n" - "1: jmp 2f \n" - - ".align 16 \n" - "2: dec %0 \n" - " jnz 2b \n" - "3: dec %0 \n" - - : /* we don't need output */ - :"a" (loops) - ); -} - -/* TSC based delay: */ -static void delay_tsc(unsigned long loops) -{ - unsigned long bclock, now; - int cpu; - - preempt_disable(); - cpu = smp_processor_id(); - rdtscl(bclock); - for (;;) { - rdtscl(now); - if ((now - bclock) >= loops) - break; - - /* Allow RT tasks to run */ - preempt_enable(); - rep_nop(); - preempt_disable(); - - /* - * It is possible that we moved to another CPU, and - * since TSC's are per-cpu we need to calculate - * that. The delay must guarantee that we wait "at - * least" the amount of time. Being moved to another - * CPU could make the wait longer but we just need to - * make sure we waited long enough. Rebalance the - * counter for this CPU. - */ - if (unlikely(cpu != smp_processor_id())) { - loops -= (now - bclock); - cpu = smp_processor_id(); - rdtscl(bclock); - } - } - preempt_enable(); -} - -/* - * Since we calibrate only once at boot, this - * function should be set once at boot and not changed - */ -static void (*delay_fn)(unsigned long) = delay_loop; - -void use_tsc_delay(void) -{ - delay_fn = delay_tsc; -} - -int __devinit read_current_timer(unsigned long *timer_val) -{ - if (delay_fn == delay_tsc) { - rdtscll(*timer_val); - return 0; - } - return -1; -} - -void __delay(unsigned long loops) -{ - delay_fn(loops); -} - -inline void __const_udelay(unsigned long xloops) -{ - int d0; - - xloops *= 4; - __asm__("mull %%edx" - :"=d" (xloops), "=&a" (d0) - :"1" (xloops), "0" - (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); - - __delay(++xloops); -} - -void __udelay(unsigned long usecs) -{ - __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ -} - -void __ndelay(unsigned long nsecs) -{ - __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ -} - -EXPORT_SYMBOL(__delay); -EXPORT_SYMBOL(__const_udelay); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__ndelay); diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c deleted file mode 100644 index ff3dfecdb6f9..000000000000 --- a/arch/x86/lib/delay_64.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Precise Delay Loops for x86-64 - * - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares - * - * The __delay function must _NOT_ be inlined as its execution time - * depends wildly on alignment on many x86 processors. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#ifdef CONFIG_SMP -#include -#endif - -/* simple loop based delay: */ -static void delay_loop(unsigned long loops) -{ - asm volatile( - " test %0,%0 \n" - " jz 3f \n" - " jmp 1f \n" - - ".align 16 \n" - "1: jmp 2f \n" - - ".align 16 \n" - "2: dec %0 \n" - " jnz 2b \n" - "3: dec %0 \n" - - : /* we don't need output */ - :"a" (loops) - ); -} - -static void delay_tsc(unsigned long loops) -{ - unsigned bclock, now; - int cpu; - - preempt_disable(); - cpu = smp_processor_id(); - rdtscl(bclock); - for (;;) { - rdtscl(now); - if ((now - bclock) >= loops) - break; - - /* Allow RT tasks to run */ - preempt_enable(); - rep_nop(); - preempt_disable(); - - /* - * It is possible that we moved to another CPU, and - * since TSC's are per-cpu we need to calculate - * that. The delay must guarantee that we wait "at - * least" the amount of time. Being moved to another - * CPU could make the wait longer but we just need to - * make sure we waited long enough. Rebalance the - * counter for this CPU. - */ - if (unlikely(cpu != smp_processor_id())) { - loops -= (now - bclock); - cpu = smp_processor_id(); - rdtscl(bclock); - } - } - preempt_enable(); -} - -static void (*delay_fn)(unsigned long) = delay_loop; - -void use_tsc_delay(void) -{ - delay_fn = delay_tsc; -} - -int __devinit read_current_timer(unsigned long *timer_value) -{ - if (delay_fn == delay_tsc) { - rdtscll(*timer_value); - return 0; - } - return -1; -} - -void __delay(unsigned long loops) -{ - delay_fn(loops); -} -EXPORT_SYMBOL(__delay); - -inline void __const_udelay(unsigned long xloops) -{ - int d0; - xloops *= 4; - __asm__("mull %%edx" - :"=d" (xloops), "=&a" (d0) - :"1" (xloops), "0" - (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); - - __delay(++xloops); -} - -EXPORT_SYMBOL(__const_udelay); - -void __udelay(unsigned long usecs) -{ - __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ -} -EXPORT_SYMBOL(__udelay); - -void __ndelay(unsigned long nsecs) -{ - __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ -} -EXPORT_SYMBOL(__ndelay); -- cgit v1.2.3 From 6c2d458680d49d939ffd4b4cdc84d9e004d65910 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 12:05:11 -0300 Subject: x86: merge getuser asm functions. getuser_32.S and getuser_64.S are merged into getuser.S. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 2 +- arch/x86/lib/getuser.S | 104 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/getuser_32.S | 79 ----------------------------------- arch/x86/lib/getuser_64.S | 100 -------------------------------------------- include/asm-x86/asm.h | 4 +- 5 files changed, 108 insertions(+), 181 deletions(-) create mode 100644 arch/x86/lib/getuser.S delete mode 100644 arch/x86/lib/getuser_32.S delete mode 100644 arch/x86/lib/getuser_64.S (limited to 'arch/x86/lib/Makefile') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 86960a6c41c0..e92948203a5d 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_SMP) := msr-on-cpu.o lib-y := delay.o -lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o +lib-y += usercopy_$(BITS).o getuser.o putuser_$(BITS).o lib-y += memcpy_$(BITS).o ifeq ($(CONFIG_X86_32),y) diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S new file mode 100644 index 000000000000..ad374003742f --- /dev/null +++ b/arch/x86/lib/getuser.S @@ -0,0 +1,104 @@ +/* + * __get_user functions. + * + * (C) Copyright 1998 Linus Torvalds + * (C) Copyright 2005 Andi Kleen + * (C) Copyright 2008 Glauber Costa + * + * These functions have a non-standard call interface + * to make them more efficient, especially as they + * return an error value in addition to the "real" + * return value. + */ + +/* + * __get_user_X + * + * Inputs: %[r|e]ax contains the address. + * The register is modified, but all changes are undone + * before returning because the C code doesn't know about it. + * + * Outputs: %[r|e]ax is error code (0 or -EFAULT) + * %[r|e]dx contains zero-extended value + * + * + * These functions should not modify any other registers, + * as they get called from within inline assembly. + */ + +#include +#include +#include +#include +#include +#include +#include + + .text +ENTRY(__get_user_1) + CFI_STARTPROC + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user +1: movzb (%_ASM_AX),%edx + xor %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_1) + +ENTRY(__get_user_2) + CFI_STARTPROC + add $1,%_ASM_AX + jc bad_get_user + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user +2: movzwl -1(%_ASM_AX),%edx + xor %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_2) + +ENTRY(__get_user_4) + CFI_STARTPROC + add $3,%_ASM_AX + jc bad_get_user + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user +3: mov -3(%_ASM_AX),%edx + xor %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_4) + +#ifdef CONFIG_X86_64 +ENTRY(__get_user_8) + CFI_STARTPROC + add $7,%_ASM_AX + jc bad_get_user + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user +4: movq -7(%_ASM_AX),%_ASM_DX + xor %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_8) +#endif + +bad_get_user: + CFI_STARTPROC + xor %edx,%edx + mov $(-EFAULT),%_ASM_AX + ret + CFI_ENDPROC +END(bad_get_user) + +.section __ex_table,"a" + _ASM_PTR 1b,bad_get_user + _ASM_PTR 2b,bad_get_user + _ASM_PTR 3b,bad_get_user +#ifdef CONFIG_X86_64 + _ASM_PTR 4b,bad_get_user +#endif diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S deleted file mode 100644 index 2bb0a183e066..000000000000 --- a/arch/x86/lib/getuser_32.S +++ /dev/null @@ -1,79 +0,0 @@ -/* - * __get_user functions. - * - * (C) Copyright 1998 Linus Torvalds - * - * These functions have a non-standard call interface - * to make them more efficient, especially as they - * return an error value in addition to the "real" - * return value. - */ -#include -#include -#include -#include - - -/* - * __get_user_X - * - * Inputs: %eax contains the address - * - * Outputs: %eax is error code (0 or -EFAULT) - * %edx contains zero-extended value - * - * These functions should not modify any other registers, - * as they get called from within inline assembly. - */ - -.text -ENTRY(__get_user_1) - CFI_STARTPROC - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -1: movzb (%_ASM_AX),%edx - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_1) - -ENTRY(__get_user_2) - CFI_STARTPROC - add $1,%_ASM_AX - jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -2: movzwl -1(%_ASM_AX),%edx - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_2) - -ENTRY(__get_user_4) - CFI_STARTPROC - add $3,%_ASM_AX - jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -3: mov -3(%_ASM_AX),%edx - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_4) - -bad_get_user: - CFI_STARTPROC - xor %edx,%edx - mov $-14,%_ASM_AX - ret - CFI_ENDPROC -END(bad_get_user) - -.section __ex_table,"a" - _ASM_PTR 1b,bad_get_user - _ASM_PTR 2b,bad_get_user - _ASM_PTR 3b,bad_get_user -.previous diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S deleted file mode 100644 index e33388419b7b..000000000000 --- a/arch/x86/lib/getuser_64.S +++ /dev/null @@ -1,100 +0,0 @@ -/* - * __get_user functions. - * - * (C) Copyright 1998 Linus Torvalds - * (C) Copyright 2005 Andi Kleen - * - * These functions have a non-standard call interface - * to make them more efficient, especially as they - * return an error value in addition to the "real" - * return value. - */ - -/* - * __get_user_X - * - * Inputs: %rax contains the address. - * The register is modified, but all changes are undone - * before returning because the C code doesn't know about it. - * - * Outputs: %rax is error code (0 or -EFAULT) - * %rdx contains zero-extended value - * - * - * These functions should not modify any other registers, - * as they get called from within inline assembly. - */ - -#include -#include -#include -#include -#include -#include -#include - - .text -ENTRY(__get_user_1) - CFI_STARTPROC - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -1: movzb (%_ASM_AX),%edx - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_1) - -ENTRY(__get_user_2) - CFI_STARTPROC - add $1,%_ASM_AX - jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -2: movzwl -1(%_ASM_AX),%edx - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_2) - -ENTRY(__get_user_4) - CFI_STARTPROC - add $3,%_ASM_AX - jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -3: mov -3(%_ASM_AX),%edx - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_4) - -ENTRY(__get_user_8) - CFI_STARTPROC - add $7,%_ASM_AX - jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -4: movq -7(%_ASM_AX),%_ASM_DX - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_8) - -bad_get_user: - CFI_STARTPROC - xor %edx,%edx - mov $(-EFAULT),%_ASM_AX - ret - CFI_ENDPROC -END(bad_get_user) - -.section __ex_table,"a" - _ASM_PTR 1b,bad_get_user - _ASM_PTR 2b,bad_get_user - _ASM_PTR 3b,bad_get_user - _ASM_PTR 4b,bad_get_user -.previous diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h index 435402e623e1..57750a95685d 100644 --- a/include/asm-x86/asm.h +++ b/include/asm-x86/asm.h @@ -3,8 +3,10 @@ #ifdef __ASSEMBLY__ # define __ASM_FORM(x) x +# define __ASM_EX_SEC .section __ex_table #else # define __ASM_FORM(x) " " #x " " +# define __ASM_EX_SEC " .section __ex_table,\"a\"\n" #endif #ifdef CONFIG_X86_32 @@ -30,7 +32,7 @@ /* Exception table entry */ # define _ASM_EXTABLE(from,to) \ - " .section __ex_table,\"a\"\n" \ + __ASM_EX_SEC \ _ASM_ALIGN "\n" \ _ASM_PTR #from "," #to "\n" \ " .previous\n" -- cgit v1.2.3 From 5cbbc3b1eb37bdc72eefd2de03b39f5e784400c2 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 17:40:14 -0300 Subject: x86: merge putuser asm functions. putuser_32.S and putuser_64.S are merged into putuser.S. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 2 +- arch/x86/lib/putuser.S | 97 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/putuser_32.S | 90 ------------------------------------------- arch/x86/lib/putuser_64.S | 94 --------------------------------------------- 4 files changed, 98 insertions(+), 185 deletions(-) create mode 100644 arch/x86/lib/putuser.S delete mode 100644 arch/x86/lib/putuser_32.S delete mode 100644 arch/x86/lib/putuser_64.S (limited to 'arch/x86/lib/Makefile') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index e92948203a5d..83226e0a7ce4 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_SMP) := msr-on-cpu.o lib-y := delay.o -lib-y += usercopy_$(BITS).o getuser.o putuser_$(BITS).o +lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o ifeq ($(CONFIG_X86_32),y) diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S new file mode 100644 index 000000000000..36b0d15ae6e9 --- /dev/null +++ b/arch/x86/lib/putuser.S @@ -0,0 +1,97 @@ +/* + * __put_user functions. + * + * (C) Copyright 2005 Linus Torvalds + * (C) Copyright 2005 Andi Kleen + * (C) Copyright 2008 Glauber Costa + * + * These functions have a non-standard call interface + * to make them more efficient, especially as they + * return an error value in addition to the "real" + * return value. + */ +#include +#include +#include +#include +#include + + +/* + * __put_user_X + * + * Inputs: %eax[:%edx] contains the data + * %ecx contains the address + * + * Outputs: %eax is error code (0 or -EFAULT) + * + * These functions should not modify any other registers, + * as they get called from within inline assembly. + */ + +#define ENTER CFI_STARTPROC ; \ + GET_THREAD_INFO(%_ASM_BX) +#define EXIT ret ; \ + CFI_ENDPROC + +.text +ENTRY(__put_user_1) + ENTER + cmp TI_addr_limit(%_ASM_BX),%_ASM_CX + jae bad_put_user +1: movb %al,(%_ASM_CX) + xor %eax,%eax + EXIT +ENDPROC(__put_user_1) + +ENTRY(__put_user_2) + ENTER + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $1,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jae bad_put_user +2: movw %ax,(%_ASM_CX) + xor %eax,%eax + EXIT +ENDPROC(__put_user_2) + +ENTRY(__put_user_4) + ENTER + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $3,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jae bad_put_user +3: movl %eax,(%_ASM_CX) + xor %eax,%eax + EXIT +ENDPROC(__put_user_4) + +ENTRY(__put_user_8) + ENTER + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $7,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jae bad_put_user +4: mov %_ASM_AX,(%_ASM_CX) +#ifdef CONFIG_X86_32 +5: movl %edx,4(%_ASM_CX) +#endif + xor %eax,%eax + EXIT +ENDPROC(__put_user_8) + +bad_put_user: + CFI_STARTPROC + movl $-EFAULT,%eax + EXIT +END(bad_put_user) + +.section __ex_table,"a" + _ASM_PTR 1b,bad_put_user + _ASM_PTR 2b,bad_put_user + _ASM_PTR 3b,bad_put_user + _ASM_PTR 4b,bad_put_user +#ifdef CONFIG_X86_32 + _ASM_PTR 5b,bad_put_user +#endif +.previous diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser_32.S deleted file mode 100644 index e7eda34feb34..000000000000 --- a/arch/x86/lib/putuser_32.S +++ /dev/null @@ -1,90 +0,0 @@ -/* - * __put_user functions. - * - * (C) Copyright 2005 Linus Torvalds - * - * These functions have a non-standard call interface - * to make them more efficient, especially as they - * return an error value in addition to the "real" - * return value. - */ -#include -#include -#include -#include - - -/* - * __put_user_X - * - * Inputs: %eax[:%edx] contains the data - * %ecx contains the address - * - * Outputs: %eax is error code (0 or -EFAULT) - * - * These functions should not modify any other registers, - * as they get called from within inline assembly. - */ - -#define ENTER CFI_STARTPROC ; \ - GET_THREAD_INFO(%_ASM_BX) -#define EXIT ret ; \ - CFI_ENDPROC - -.text -ENTRY(__put_user_1) - ENTER - cmp TI_addr_limit(%_ASM_BX),%_ASM_CX - jae bad_put_user -1: movb %al,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_1) - -ENTRY(__put_user_2) - ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX - sub $1,%_ASM_BX - cmp %_ASM_BX,%_ASM_CX - jae bad_put_user -2: movw %ax,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_2) - -ENTRY(__put_user_4) - ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX - sub $3,%_ASM_BX - cmp %_ASM_BX,%_ASM_CX - jae bad_put_user -3: movl %eax,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_4) - -ENTRY(__put_user_8) - ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX - sub $7,%_ASM_BX - cmp %_ASM_BX,%_ASM_CX - jae bad_put_user -4: movl %_ASM_AX,(%_ASM_CX) -5: movl %edx,4(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_8) - -bad_put_user: - CFI_STARTPROC - movl $-14,%eax - EXIT -END(bad_put_user) - -.section __ex_table,"a" - _ASM_PTR 1b,bad_put_user - _ASM_PTR 2b,bad_put_user - _ASM_PTR 3b,bad_put_user - _ASM_PTR 4b,bad_put_user - _ASM_PTR 5b,bad_put_user -.previous diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S deleted file mode 100644 index d496cc8e7308..000000000000 --- a/arch/x86/lib/putuser_64.S +++ /dev/null @@ -1,94 +0,0 @@ -/* - * __put_user functions. - * - * (C) Copyright 1998 Linus Torvalds - * (C) Copyright 2005 Andi Kleen - * - * These functions have a non-standard call interface - * to make them more efficient, especially as they - * return an error value in addition to the "real" - * return value. - */ - -/* - * __put_user_X - * - * Inputs: %rcx contains the address - * %rdx contains new value - * - * Outputs: %rax is error code (0 or -EFAULT) - * - * %rbx is destroyed. - * - * These functions should not modify any other registers, - * as they get called from within inline assembly. - */ - -#include -#include -#include -#include -#include -#include -#include - -#define ENTER CFI_STARTPROC ; \ - GET_THREAD_INFO(%_ASM_BX) -#define EXIT ret ; \ - CFI_ENDPROC - - .text -ENTRY(__put_user_1) - ENTER - cmp TI_addr_limit(%_ASM_BX),%_ASM_CX - jae bad_put_user -1: movb %al,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_1) - -ENTRY(__put_user_2) - ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX - sub $1, %_ASM_BX - cmp %_ASM_BX ,%_ASM_CX - jae bad_put_user -2: movw %ax,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_2) - -ENTRY(__put_user_4) - ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX - sub $3, %_ASM_BX - cmp %_ASM_BX, %_ASM_CX - jae bad_put_user -3: movl %eax,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_4) - -ENTRY(__put_user_8) - ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX - sub $7, %_ASM_BX - cmp %_ASM_BX, %_ASM_CX - jae bad_put_user -4: movq %_ASM_AX,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_8) - -bad_put_user: - CFI_STARTPROC - mov $(-EFAULT),%eax - EXIT -END(bad_put_user) - -.section __ex_table,"a" - _ASM_PTR 1b,bad_put_user - _ASM_PTR 2b,bad_put_user - _ASM_PTR 3b,bad_put_user - _ASM_PTR 4b,bad_put_user -.previous -- cgit v1.2.3 From fb481dd56adf3c5b0993b8f052cc9ba966e3959d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 4 Sep 2008 13:46:11 +0200 Subject: x86: drop -funroll-loops for csum_partial_64.c Impact: performance optimization I did some rebenchmarking with modern compilers and dropping -funroll-loops makes the function consistently go faster by a few percent. So drop that flag. Thanks to Richard Guenther for a hint. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/lib/Makefile | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/lib/Makefile') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index aa3fa4119424..55e11aa6d66c 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -17,9 +17,6 @@ ifeq ($(CONFIG_X86_32),y) lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else obj-y += io_64.o iomap_copy_64.o - - CFLAGS_csum-partial_64.o := -funroll-loops - lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o -- cgit v1.2.3