From 6859a8402945cf1d74af75a2e1aa4e327a506ab4 Mon Sep 17 00:00:00 2001 From: Alan Mayer Date: Wed, 26 Mar 2008 16:11:31 -0500 Subject: x86: resize NR_IRQS for large machines On machines with very large numbers of cpus, tables that are dimensioned by NR_IRQS get very large, especially the irq_desc table. They are also very sparsely used. When the cpu count is > MAX_IO_APICS, use MAX_IO_APICS to set NR_IRQS, otherwise use NR_CPUS. Signed-off-by: Alan Mayer Reviewed-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/kernel_stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index e8ffce898bf9..cf9f40a91c9c 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -1,11 +1,11 @@ #ifndef _LINUX_KERNEL_STAT_H #define _LINUX_KERNEL_STAT_H -#include #include #include #include #include +#include #include /* -- cgit v1.2.1 From 988f7b5789ccf5cfed14c72e28573a49f0cb4809 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:22 -0700 Subject: x86: PAT export resource_wc in pci sysfs For the ranges with IORESOURCE_PREFETCH, export a new resource_wc interface in pci /sysfs along with resource (which is uncached). Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Acked-by: Jesse Barnes Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 509159bcd4e7..d18b1dd49fab 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -206,6 +206,7 @@ struct pci_dev { struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ int rom_attr_enabled; /* has display of the rom attribute been enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ + struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ #ifdef CONFIG_PCI_MSI struct list_head msi_list; #endif -- cgit v1.2.1 From 1a189b97190d3f0f8cf0379a799d3555b2d648bb Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 13 Apr 2008 21:41:55 +0100 Subject: [ARM] pxa: Add bare bones PWM API Signed-off-by: Russell King --- include/linux/pwm.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/pwm.h (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h new file mode 100644 index 000000000000..3945f803d514 --- /dev/null +++ b/include/linux/pwm.h @@ -0,0 +1,31 @@ +#ifndef __LINUX_PWM_H +#define __LINUX_PWM_H + +struct pwm_device; + +/* + * pwm_request - request a PWM device + */ +struct pwm_device *pwm_request(int pwm_id, const char *label); + +/* + * pwm_free - free a PWM device + */ +void pwm_free(struct pwm_device *pwm); + +/* + * pwm_config - change a PWM device configuration + */ +int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns); + +/* + * pwm_enable - start a PWM output toggling + */ +int pwm_enable(struct pwm_device *pwm); + +/* + * pwm_disable - stop a PWM output toggling + */ +void pwm_disable(struct pwm_device *pwm); + +#endif /* __ASM_ARCH_PWM_H */ -- cgit v1.2.1 From bd3bff9e20f454b242d979ec2f9a4dca0d5fa06f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:41 +0200 Subject: sched: add latency tracer callbacks to the scheduler add 3 lightweight callbacks to the tracer backend. zero impact if tracing is turned off. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4b..717cab8a0c83 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2117,6 +2117,32 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern void +ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +#else +static inline void +ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +{ +} +#endif + +#ifdef CONFIG_SCHED_TRACER +extern void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); +extern void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr); +#else +static inline void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +{ +} +static inline void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) +{ +} +#endif + extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); -- cgit v1.2.1 From 7c731e0a495e25e79dc1e9e68772a67a55721a65 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: make the task state char-string visible to all The tracer wants to be able to convert the state number into a user visible character. This patch pulls that conversion string out the scheduler into the header. This way if it were to ever change, other parts of the kernel will know. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 717cab8a0c83..6e26f1fdbfe2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2237,6 +2237,8 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ +#define TASK_STATE_TO_CHAR_STR "RSDTtZX" + #endif /* __KERNEL__ */ #endif -- cgit v1.2.1 From 502825282e6f79c975a644afc124432ec1744de4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: add preempt_enable/disable notrace macros The tracer may need to call preempt_enable and disable functions for time keeping and such. The trace gets ugly when we see these functions show up for all traces. To make the output cleaner this patch adds preempt_enable_notrace and preempt_disable_notrace to be used by tracer (and debugging) functions. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/preempt.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 23f0c54175cd..36b03d50bf40 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -52,6 +52,34 @@ do { \ preempt_check_resched(); \ } while (0) +/* For debugging and tracer internals only! */ +#define add_preempt_count_notrace(val) \ + do { preempt_count() += (val); } while (0) +#define sub_preempt_count_notrace(val) \ + do { preempt_count() -= (val); } while (0) +#define inc_preempt_count_notrace() add_preempt_count_notrace(1) +#define dec_preempt_count_notrace() sub_preempt_count_notrace(1) + +#define preempt_disable_notrace() \ +do { \ + inc_preempt_count_notrace(); \ + barrier(); \ +} while (0) + +#define preempt_enable_no_resched_notrace() \ +do { \ + barrier(); \ + dec_preempt_count_notrace(); \ +} while (0) + +/* preempt_check_resched is OK to trace */ +#define preempt_enable_notrace() \ +do { \ + preempt_enable_no_resched_notrace(); \ + barrier(); \ + preempt_check_resched(); \ +} while (0) + #else #define preempt_disable() do { } while (0) @@ -59,6 +87,10 @@ do { \ #define preempt_enable() do { } while (0) #define preempt_check_resched() do { } while (0) +#define preempt_disable_notrace() do { } while (0) +#define preempt_enable_no_resched_notrace() do { } while (0) +#define preempt_enable_notrace() do { } while (0) + #endif #ifdef CONFIG_PREEMPT_NOTIFIERS -- cgit v1.2.1 From ffdc1a09ae7e2cbd714a446ee38a27f625b5f1c8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:41 +0200 Subject: tracing: add notrace to linkage.h notrace signals that a function should not be traced. Most of the time this is used by tracers to annotate code that cannot be traced - it's in a volatile state (such as in user vdso context or NMI context) or it's in the tracer internals. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/linkage.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 2119610b24f8..14f329c64ba8 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -3,6 +3,8 @@ #include +#define notrace __attribute__((no_instrument_function)) + #ifdef __cplusplus #define CPP_ASMLINKAGE extern "C" #else -- cgit v1.2.1 From 16444a8a40d4c7b4f6de34af0cae1f76a4f6c901 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: add basic support for gcc profiler instrumentation If CONFIG_FTRACE is selected and /proc/sys/kernel/ftrace_enabled is set to a non-zero value the ftrace routine will be called everytime we enter a kernel function that is not marked with the "notrace" attribute. The ftrace routine will then call a registered function if a function happens to be registered. [ This code has been highly hacked by Steven Rostedt and Ingo Molnar, so don't blame Arnaldo for all of this ;-) ] Update: It is now possible to register more than one ftrace function. If only one ftrace function is registered, that will be the function that ftrace calls directly. If more than one function is registered, then ftrace will call a function that will loop through the functions to call. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 include/linux/ftrace.h (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h new file mode 100644 index 000000000000..b96ef14c249a --- /dev/null +++ b/include/linux/ftrace.h @@ -0,0 +1,38 @@ +#ifndef _LINUX_FTRACE_H +#define _LINUX_FTRACE_H + +#ifdef CONFIG_FTRACE + +#include + +#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +#define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +#define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) + +typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); + +struct ftrace_ops { + ftrace_func_t func; + struct ftrace_ops *next; +}; + +/* + * The ftrace_ops must be a static and should also + * be read_mostly. These functions do modify read_mostly variables + * so use them sparely. Never free an ftrace_op or modify the + * next pointer after it has been registered. Even after unregistering + * it, the next pointer may still be used internally. + */ +int register_ftrace_function(struct ftrace_ops *ops); +int unregister_ftrace_function(struct ftrace_ops *ops); +void clear_ftrace_function(void); + +extern void ftrace_stub(unsigned long a0, unsigned long a1); +extern void mcount(void); + +#else /* !CONFIG_FTRACE */ +# define register_ftrace_function(ops) do { } while (0) +# define unregister_ftrace_function(ops) do { } while (0) +# define clear_ftrace_function(ops) do { } while (0) +#endif /* CONFIG_FTRACE */ +#endif /* _LINUX_FTRACE_H */ -- cgit v1.2.1 From 352ad25aa4a189c667cb2af333948d34692a2d27 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: tracer for scheduler wakeup latency This patch adds the tracer that tracks the wakeup latency of the highest priority waking task. "wakeup" is added to /debugfs/tracing/available_tracers Also added to /debugfs/tracing tracing_max_latency holds the current max latency for the wakeup wakeup_thresh if set to other than zero, a log will be recorded for every wakeup that takes longer than the number entered in here (usecs for all counters) (deletes previous trace) Examples: (with ftrace_enabled = 0) ============ preemption latency trace v1.1.5 on 2.6.24-rc8 Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 26 us, #2/2, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: migration/0-3 (uid:0 nice:-5 policy:1 rt_prio:99) ----------------- _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / quilt-8551 0d..3 0us+: wake_up_process+0x15/0x17 (sched_exec+0xc9/0x100 ) quilt-8551 0d..4 26us : sched_switch_callback+0x73/0x81 (schedule+0x483/0x6d5 ) vim:ft=help ============ (with ftrace_enabled = 1) ============ preemption latency trace v1.1.5 on 2.6.24-rc8 -------------------------------------------------------------------- latency: 36 us, #45/45, CPU#0 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: migration/1-5 (uid:0 nice:-5 policy:1 rt_prio:99) ----------------- _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / bash-10653 1d..3 0us : wake_up_process+0x15/0x17 (sched_exec+0xc9/0x100 ) bash-10653 1d..3 1us : try_to_wake_up+0x271/0x2e7 (sub_preempt_count+0xc/0x7a ) bash-10653 1d..2 2us : try_to_wake_up+0x296/0x2e7 (update_rq_clock+0x9/0x20 ) bash-10653 1d..2 2us : update_rq_clock+0x1e/0x20 (__update_rq_clock+0xc/0x90 ) bash-10653 1d..2 3us : __update_rq_clock+0x1b/0x90 (sched_clock+0x9/0x29 ) bash-10653 1d..2 4us : try_to_wake_up+0x2a6/0x2e7 (activate_task+0xc/0x3f ) bash-10653 1d..2 4us : activate_task+0x2d/0x3f (enqueue_task+0xe/0x66 ) bash-10653 1d..2 5us : enqueue_task+0x5b/0x66 (enqueue_task_rt+0x9/0x3c ) bash-10653 1d..2 6us : try_to_wake_up+0x2ba/0x2e7 (check_preempt_wakeup+0x12/0x99 ) [...] bash-10653 1d..5 33us : tracing_record_cmdline+0xcf/0xd4 (_spin_unlock+0x9/0x33 ) bash-10653 1d..5 34us : _spin_unlock+0x19/0x33 (sub_preempt_count+0xc/0x7a ) bash-10653 1d..4 35us : wakeup_sched_switch+0x65/0x2ff (_spin_lock_irqsave+0xc/0xa9 ) bash-10653 1d..4 35us : _spin_lock_irqsave+0x19/0xa9 (add_preempt_count+0xe/0x77 ) bash-10653 1d..4 36us : sched_switch_callback+0x73/0x81 (schedule+0x483/0x6d5 ) vim:ft=help ============ The [...] was added here to not waste your email box space. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b96ef14c249a..db8a5e7abe41 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,10 +5,6 @@ #include -#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -#define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) -#define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) - typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_ops { @@ -35,4 +31,23 @@ extern void mcount(void); # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) #endif /* CONFIG_FTRACE */ + + +#ifdef CONFIG_FRAME_POINTER +/* TODO: need to fix this for ARM */ +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) +# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) +# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) +# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +#else +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 0UL +# define CALLER_ADDR2 0UL +# define CALLER_ADDR3 0UL +# define CALLER_ADDR4 0UL +# define CALLER_ADDR5 0UL +#endif + #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.1 From 81d68a96a39844853b37f20cc8282d9b65b78ef3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace irq disabled critical timings This patch adds latency tracing for critical timings (how long interrupts are disabled for). "irqsoff" is added to /debugfs/tracing/available_tracers Note: tracing_max_latency also holds the max latency for irqsoff (in usecs). (default to large number so one must start latency tracing) tracing_thresh threshold (in usecs) to always print out if irqs off is detected to be longer than stated here. If irq_thresh is non-zero, then max_irq_latency is ignored. Here's an example of a trace with ftrace_enabled = 0 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 100 us, #3/3, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1d.s3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1d.s3 100us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1d.s3 100us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= And this is a trace with ftrace_enabled == 1 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 -------------------------------------------------------------------- latency: 102 us, #12/12, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1dNs3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_read_phy_reg+0x16/0x225 [e1000] (e1000_update_stats+0x5e2/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_swfw_sync_acquire+0x10/0x99 [e1000] (e1000_read_phy_reg+0x49/0x225 [e1000]) swapper-0 1dNs3 46us : e1000_get_hw_eeprom_semaphore+0x12/0xa6 [e1000] (e1000_swfw_sync_acquire+0x36/0x99 [e1000]) swapper-0 1dNs3 47us : __const_udelay+0x9/0x47 (e1000_read_phy_reg+0x116/0x225 [e1000]) swapper-0 1dNs3 47us+: __delay+0x9/0x50 (__const_udelay+0x45/0x47) swapper-0 1dNs3 97us : preempt_schedule+0xc/0x84 (__delay+0x4e/0x50) swapper-0 1dNs3 98us : e1000_swfw_sync_release+0xc/0x55 [e1000] (e1000_read_phy_reg+0x211/0x225 [e1000]) swapper-0 1dNs3 99us+: e1000_put_hw_eeprom_semaphore+0x9/0x35 [e1000] (e1000_swfw_sync_release+0x50/0x55 [e1000]) swapper-0 1dNs3 101us : _spin_unlock_irqrestore+0xe/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++++++ include/linux/irqflags.h | 12 ++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db8a5e7abe41..0a20445dcbcc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -50,4 +50,12 @@ extern void mcount(void); # define CALLER_ADDR5 0UL #endif +#ifdef CONFIG_IRQSOFF_TRACER + extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1); + extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1); +#else +# define time_hardirqs_on(a0, a1) do { } while (0) +# define time_hardirqs_off(a0, a1) do { } while (0) +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index e600c4e9b8c5..5b711d4e9fd9 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -12,10 +12,10 @@ #define _LINUX_TRACE_IRQFLAGS_H #ifdef CONFIG_TRACE_IRQFLAGS - extern void trace_hardirqs_on(void); - extern void trace_hardirqs_off(void); extern void trace_softirqs_on(unsigned long ip); extern void trace_softirqs_off(unsigned long ip); + extern void trace_hardirqs_on(void); + extern void trace_hardirqs_off(void); # define trace_hardirq_context(p) ((p)->hardirq_context) # define trace_softirq_context(p) ((p)->softirq_context) # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) @@ -41,6 +41,14 @@ # define INIT_TRACE_IRQFLAGS #endif +#ifdef CONFIG_IRQSOFF_TRACER + extern void stop_critical_timings(void); + extern void start_critical_timings(void); +#else +# define stop_critical_timings() do { } while (0) +# define start_critical_timings() do { } while (0) +#endif + #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #include -- cgit v1.2.1 From 6cd8a4bb2f97527a9ceb30bc77ea4e959c6a95e3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace preempt off critical timings Add preempt off timings. A lot of kernel core code is taken from the RT patch latency trace that was written by Ingo Molnar. This adds "preemptoff" and "preemptirqsoff" to /debugfs/tracing/available_tracers Now instead of just tracing irqs off, preemption off can be selected to be recorded. When this is selected, it shares the same files as irqs off timings. One can either trace preemption off, irqs off, or one or the other off. By echoing "preemptoff" into /debugfs/tracing/current_tracer, recording of preempt off only is performed. "irqsoff" will only record the time irqs are disabled, but "preemptirqsoff" will take the total time irqs or preemption are disabled. Runtime switching of these options is now supported by simpling echoing in the appropriate trace name into /debugfs/tracing/current_tracer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++++++ include/linux/irqflags.h | 3 ++- include/linux/preempt.h | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0a20445dcbcc..740c97dcf9cb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,4 +58,12 @@ extern void mcount(void); # define time_hardirqs_off(a0, a1) do { } while (0) #endif +#ifdef CONFIG_PREEMPT_TRACER + extern void notrace trace_preempt_on(unsigned long a0, unsigned long a1); + extern void notrace trace_preempt_off(unsigned long a0, unsigned long a1); +#else +# define trace_preempt_on(a0, a1) do { } while (0) +# define trace_preempt_off(a0, a1) do { } while (0) +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5b711d4e9fd9..2b1c2e58566e 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -41,7 +41,8 @@ # define INIT_TRACE_IRQFLAGS #endif -#ifdef CONFIG_IRQSOFF_TRACER +#if defined(CONFIG_IRQSOFF_TRACER) || \ + defined(CONFIG_PREEMPT_TRACER) extern void stop_critical_timings(void); extern void start_critical_timings(void); #else diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 36b03d50bf40..72b1a10a59b6 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -10,7 +10,7 @@ #include #include -#ifdef CONFIG_DEBUG_PREEMPT +#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void add_preempt_count(int val); extern void sub_preempt_count(int val); #else -- cgit v1.2.1 From 3d0833953e1b98b79ddf491dd49229eef9baeac1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: dynamic enabling/disabling of function calls This patch adds a feature to dynamically replace the ftrace code with the jmps to allow a kernel with ftrace configured to run as fast as it can without it configured. The way this works, is on bootup (if ftrace is enabled), a ftrace function is registered to record the instruction pointer of all places that call the function. Later, if there's still any code to patch, a kthread is awoken (rate limited to at most once a second) that performs a stop_machine, and replaces all the code that was called with a jmp over the call to ftrace. It only replaces what was found the previous time. Typically the system reaches equilibrium quickly after bootup and there's no code patching needed at all. e.g. call ftrace /* 5 bytes */ is replaced with jmp 3f /* jmp is 2 bytes and we jump 3 forward */ 3: When we want to enable ftrace for function tracing, the IP recording is removed, and stop_machine is called again to replace all the locations of that were recorded back to the call of ftrace. When it is disabled, we replace the code back to the jmp. Allocation is done by the kthread. If the ftrace recording function is called, and we don't have any record slots available, then we simply skip that call. Once a second a new page (if needed) is allocated for recording new ftrace function calls. A large batch is allocated at boot up to get most of the calls there. Because we do this via stop_machine, we don't have to worry about another CPU executing a ftrace call as we modify it. But we do need to worry about NMI's so all functions that might be called via nmi must be annotated with notrace_nmi. When this code is configured in, the NMI code will not call notrace. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 740c97dcf9cb..90dbc0ee2046 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -32,6 +32,24 @@ extern void mcount(void); # define clear_ftrace_function(ops) do { } while (0) #endif /* CONFIG_FTRACE */ +#ifdef CONFIG_DYNAMIC_FTRACE +# define FTRACE_HASHBITS 10 +# define FTRACE_HASHSIZE (1< Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: add ftrace_enabled sysctl to disable mcount function This patch adds back the sysctl ftrace_enabled. This time it is defaulted to on, if DYNAMIC_FTRACE is configured. When ftrace_enabled is disabled, the ftrace function is set to the stub return. If DYNAMIC_FTRACE is also configured, on ftrace_enabled = 0, the registered ftrace functions will all be set to jmps, but no more new calls to ftrace recording (used to find the ftrace calling sites) will be called. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 90dbc0ee2046..ccd8537dbdb7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,6 +5,12 @@ #include +extern int ftrace_enabled; +extern int +ftrace_enable_sysctl(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_ops { -- cgit v1.2.1 From 3c1720f00bb619302ba19d55986ab565e74d06db Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: move memory management out of arch code This patch moves the memory management of the ftrace records out of the arch code and into the generic code making the arch code simpler. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ccd8537dbdb7..d509ad6c9cb8 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -42,19 +42,23 @@ extern void mcount(void); # define FTRACE_HASHBITS 10 # define FTRACE_HASHSIZE (1< Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: use dynamic patching for updating mcount calls This patch replaces the indirect call to the mcount function pointer with a direct call that will be patched by the dynamic ftrace routines. On boot up, the mcount function calls the ftace_stub function. When the dynamic ftrace code is initialized, the ftrace_stub is replaced with a call to the ftrace_record_ip, which records the instruction pointers of the locations that call it. Later, the ftraced daemon will call kstop_machine and patch all the locations to nops. When a ftrace is enabled, the original calls to mcount will now be set top call ftrace_caller, which will do a direct call to the registered ftrace function. This direct call is also patched when the function that should be called is updated. All patching is performed by a kstop_machine routine to prevent any type of race conditions that is associated with modifying code on the fly. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d509ad6c9cb8..b0dd0093058f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -56,9 +56,14 @@ struct dyn_ftrace { extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); -extern int ftrace_dyn_arch_init(void); +extern int ftrace_dyn_arch_init(void *data); +extern int ftrace_mcount_set(unsigned long *data); extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code); +extern int ftrace_update_ftrace_func(ftrace_func_t func); +extern void ftrace_caller(void); +extern void ftrace_call(void); +extern void mcount_call(void); #endif #ifdef CONFIG_FRAME_POINTER -- cgit v1.2.1 From 5072c59fd45e9976d02ee6f18c7336ef97623cbc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: add filter select functions to trace This patch adds two files to the debugfs system: /debugfs/tracing/available_filter_functions and /debugfs/tracing/set_ftrace_filter The available_filter_functions lists all functions that has been recorded by the ftraced that has called the ftrace_record_ip function. This is to allow users to see what functions have been converted to nops and can be enabled for tracing. To enable functions, simply echo the names (whitespace delimited) into set_ftrace_filter. Simple wildcards are also allowed. echo 'scheduler' > /debugfs/tracing/set_ftrace_filter Will have only the scheduler be activated when tracing is enabled. echo 'sched_*' > /debugfs/tracing/set_ftrace_filter Will have only the functions starting with 'sched_' be activated. echo '*lock' > /debugfs/tracing/set_ftrace_filter Will have only functions ending with 'lock' be activated. echo '*lock*' > /debugfs/tracing/set_ftrace_filter Will have only functions with 'lock' in its name be activated. Note: 'sched*lock' will not work. The only wildcards that are allowed is an asterisk and the beginning and or end of the string passed in. Multiple names can be passed in with whitespace delimited: echo 'scheduler *lock *acpi*' > /debugfs/tracing/set_ftrace_filter is also the same as: echo 'scheduler' > /debugfs/tracing/set_ftrace_filter echo '*lock' >> /debugfs/tracing/set_ftrace_filter echo '*acpi*' >> /debugfs/tracing/set_ftrace_filter Appending does just that. It appends to the list. To disable all filters simply echo an empty line in: echo > /debugfs/tracing/set_ftrace_filter Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b0dd0093058f..f5911d2d42c3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,7 +43,9 @@ extern void mcount(void); # define FTRACE_HASHSIZE (1< Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: fix kexec disable the tracer while kexec pulls the rug from under the old kernel. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f5911d2d42c3..a42390c1d6e1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -68,6 +68,13 @@ extern void ftrace_call(void); extern void mcount_call(void); #endif +static inline void tracer_disable(void) +{ +#ifdef CONFIG_FTRACE + ftrace_enabled = 0; +#endif +} + #ifdef CONFIG_FRAME_POINTER /* TODO: need to fix this for ARM */ # define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -- cgit v1.2.1 From e1c08bdd9fa73e44096e5a82c0d5928b04ab02c8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: force recording Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a42390c1d6e1..2c1670c65236 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -54,6 +54,8 @@ struct dyn_ftrace { unsigned long flags; }; +int ftrace_force_update(void); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); @@ -66,6 +68,8 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +#else +# define ftrace_force_update() do { } while (0) #endif static inline void tracer_disable(void) -- cgit v1.2.1 From c7aafc549766b87819285d3480648fc652a47bc4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: cleanups factor out code and clean it up. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2c1670c65236..953a36d6a199 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -69,7 +69,7 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); #else -# define ftrace_force_update() do { } while (0) +# define ftrace_force_update() ({ 0; }) #endif static inline void tracer_disable(void) -- cgit v1.2.1 From 77a2b37d227483fe52aead242652aee406c25bf0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: startup tester on dynamic tracing. This patch adds a startup self test on dynamic code modification and filters. The test filters on a specific function, makes sure that no other function is traced, exectutes the function, then makes sure that the function is traced. This patch also fixes a slight bug with the ftrace selftest, where tracer_enabled was not being set. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 953a36d6a199..a842d96c6343 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -55,6 +55,7 @@ struct dyn_ftrace { }; int ftrace_force_update(void); +void ftrace_set_filter(unsigned char *buf, int len, int reset); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); @@ -70,6 +71,7 @@ extern void ftrace_call(void); extern void mcount_call(void); #else # define ftrace_force_update() ({ 0; }) +# define ftrace_set_filter(buf, len, reset) do { } while (0) #endif static inline void tracer_disable(void) -- cgit v1.2.1 From 37ad508419f0fdfda7b378756eb1f35cfd26d96d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace - fix dynamic ftrace memory leak The ftrace dynamic function update allocates a record to store the instruction pointers that are being modified. If the modified instruction pointer fails to update, then the record is marked as failed and nothing more is done. Worse, if the modification fails, but the record ip function is still called, it will allocate a new record and try again. In just a matter of time, will this cause a serious memory leak and crash the system. This patch plugs this memory leak. When a record fails, it is included back into the pool of records to be used. Now a record may fail over and over again, but the number of allocated records will not increase. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a842d96c6343..61e757bd2350 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,9 +43,10 @@ extern void mcount(void); # define FTRACE_HASHSIZE (1< Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: disable tracing on failure Since ftrace touches practically every function. If we detect any anomaly, we want to fully disable ftrace. This patch adds code to try shutdown ftrace as much as possible without doing any more harm is something is detected not quite correct. This only kills ftrace, this patch does have checks for other parts of the tracer (irqsoff, wakeup, etc.). Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 61e757bd2350..4650a3160b7f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,6 +58,9 @@ struct dyn_ftrace { int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); +/* totally disable ftrace - can not re-enable after this */ +void ftrace_kill(void); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); -- cgit v1.2.1 From aeaee8a2c9cb4489f166ca0e39c568e8254faaa6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: build fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4650a3160b7f..08fbef1744cc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,9 +58,6 @@ struct dyn_ftrace { int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); -/* totally disable ftrace - can not re-enable after this */ -void ftrace_kill(void); - /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); @@ -74,10 +71,13 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); #else -# define ftrace_force_update() ({ 0; }) -# define ftrace_set_filter(buf, len, reset) do { } while (0) +# define ftrace_force_update() ({ 0; }) +# define ftrace_set_filter(buf, len, reset) do { } while (0) #endif +/* totally disable ftrace - can not re-enable after this */ +void ftrace_kill(void); + static inline void tracer_disable(void) { #ifdef CONFIG_FTRACE -- cgit v1.2.1 From 86387f7ee5d3273ff4859e2c64ce656639b6ca65 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: add stack tracing Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 08fbef1744cc..0d3714e7110b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -93,6 +93,7 @@ static inline void tracer_disable(void) # define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) # define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) # define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) #else # define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) # define CALLER_ADDR1 0UL @@ -100,6 +101,7 @@ static inline void tracer_disable(void) # define CALLER_ADDR3 0UL # define CALLER_ADDR4 0UL # define CALLER_ADDR5 0UL +# define CALLER_ADDR6 0UL #endif #ifdef CONFIG_IRQSOFF_TRACER -- cgit v1.2.1 From 8ac0fca4ccb355ce50471d7aa3f10f5900b28b95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: sched tracer fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e26f1fdbfe2..05744f9cb096 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2130,17 +2130,11 @@ ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) #ifdef CONFIG_SCHED_TRACER extern void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -extern void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr); #else static inline void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) { } -static inline void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -- cgit v1.2.1 From 4e65551905fb0300ae7e667cbaa41ee2e3f29a13 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: sched tracer, trace full rbtree Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 05744f9cb096..652d380ae563 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,20 +2119,34 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); +extern void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr); +extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) +{ +} +static inline void +sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) +{ +} +static inline void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr) +{ +} +static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -#endif - -#ifdef CONFIG_SCHED_TRACER -extern void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -#else static inline void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) { } #endif -- cgit v1.2.1 From 017730c11241e26577673eb9d957cfc66172ea91 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix wakeups Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 652d380ae563..a3970b563757 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -246,6 +246,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); +extern int runqueue_is_locked(void); + extern cpumask_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); -- cgit v1.2.1 From 1a3c3034336320554a3342572dae98d69e054fc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix __trace_special() Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a3970b563757..5b186bed54bc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,6 +2119,18 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_TRACING +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} +#endif + #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); @@ -2126,9 +2138,6 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2146,11 +2155,6 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -static inline void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -- cgit v1.2.1 From 88a4216c3ec4281fc7e6725cc3a3ccd01fb1aa14 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: sched special Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5b186bed54bc..360ca99033d2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2138,6 +2138,8 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2155,6 +2157,10 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -- cgit v1.2.1 From 3eefae994d9224fb7771a3ddb683868363c23510 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:04 +0200 Subject: ftrace: limit trace entries Currently there is no protection from the root user to use up all of memory for trace buffers. If the root user allocates too many entries, the OOM killer might start kill off all tasks. This patch adds an algorith to check the following condition: pages_requested > (freeable_memory + current_trace_buffer_pages) / 4 If the above is met then the allocation fails. The above prevents more than 1/4th of freeable memory from being used by trace buffers. To determine the freeable_memory, I made determine_dirtyable_memory in mm/page-writeback.c global. Special thanks goes to Peter Zijlstra for suggesting the above calculation. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f462439cc288..bd91987c065f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -105,6 +105,8 @@ extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; +extern unsigned long determine_dirtyable_memory(void); + extern int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); -- cgit v1.2.1 From dc102a8fae2d0d6bf5223fc549247f2e23959ae6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:09 +0200 Subject: Markers - remove extra format argument Denys Vlasenko : > Not in this patch, but I noticed: > > #define __trace_mark(name, call_private, format, args...) \ > do { \ > static const char __mstrtab_##name[] \ > __attribute__((section("__markers_strings"))) \ > = #name "\0" format; \ > static struct marker __mark_##name \ > __attribute__((section("__markers"), aligned(8))) = \ > { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ > 0, 0, marker_probe_cb, \ > { __mark_empty_function, NULL}, NULL }; \ > __mark_check_format(format, ## args); \ > if (unlikely(__mark_##name.state)) { \ > (*__mark_##name.call) \ > (&__mark_##name, call_private, \ > format, ## args); \ > } \ > } while (0) > > In this call: > > (*__mark_##name.call) \ > (&__mark_##name, call_private, \ > format, ## args); \ > > you make gcc allocate duplicate format string. You can use > &__mstrtab_##name[sizeof(#name)] instead since it holds the same string, > or drop ", format," above and "const char *fmt" from here: > > void (*call)(const struct marker *mdata, /* Probe wrapper */ > void *call_private, const char *fmt, ...); > > since mdata->format is the same and all callees which need it can take it there. Very good point. I actually thought about dropping it, since it would remove an unnecessary argument from the stack. And actually, since I now have the marker_probe_cb sitting between the marker site and the callbacks, there is no API change required. Thanks :) Mathieu Signed-off-by: Mathieu Desnoyers CC: Denys Vlasenko Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/marker.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 430f6adf9762..338533abb47b 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -44,8 +44,8 @@ struct marker { */ char state; /* Marker state. */ char ptype; /* probe type : 0 : single, 1 : multi */ - void (*call)(const struct marker *mdata, /* Probe wrapper */ - void *call_private, const char *fmt, ...); + /* Probe wrapper */ + void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; } __attribute__((aligned(8))); @@ -72,8 +72,7 @@ struct marker { __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ - (&__mark_##name, call_private, \ - format, ## args); \ + (&__mark_##name, call_private, ## args);\ } \ } while (0) @@ -117,9 +116,9 @@ static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, - void *call_private, const char *fmt, ...); + void *call_private, ...); extern void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, const char *fmt, ...); + void *call_private, ...); /* * Connect a probe to a marker. -- cgit v1.2.1 From 0aa977f592f17004f9d1d545f2e1bb9ea71896c3 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:10 +0200 Subject: Markers - define non optimized marker To support the forthcoming "immediate values" marker optimization, we must have a way to declare markers in few code paths that does not use instruction modification based enable. This will be the case of printk(), some traps and eventually lockdep instrumentation. Changelog : - Fix reversed boolean logic of "generic". Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/marker.h | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 338533abb47b..1290653f9241 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -58,8 +58,12 @@ struct marker { * Make sure the alignment of the structure in the __markers section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. + * + * The "generic" argument controls which marker enabling mechanism must be used. + * If generic is true, a variable read is used. + * If generic is false, immediate values are used. */ -#define __trace_mark(name, call_private, format, args...) \ +#define __trace_mark(generic, name, call_private, format, args...) \ do { \ static const char __mstrtab_##name[] \ __attribute__((section("__markers_strings"))) \ @@ -79,7 +83,7 @@ struct marker { extern void marker_update_probe_range(struct marker *begin, struct marker *end); #else /* !CONFIG_MARKERS */ -#define __trace_mark(name, call_private, format, args...) \ +#define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) static inline void marker_update_probe_range(struct marker *begin, struct marker *end) @@ -87,15 +91,30 @@ static inline void marker_update_probe_range(struct marker *begin, #endif /* CONFIG_MARKERS */ /** - * trace_mark - Marker + * trace_mark - Marker using code patching * @name: marker name, not quoted. * @format: format string * @args...: variable argument list * - * Places a marker. + * Places a marker using optimized code patching technique (imv_read()) + * to be enabled when immediate values are present. */ #define trace_mark(name, format, args...) \ - __trace_mark(name, NULL, format, ## args) + __trace_mark(0, name, NULL, format, ## args) + +/** + * _trace_mark - Marker using variable read + * @name: marker name, not quoted. + * @format: format string + * @args...: variable argument list + * + * Places a marker using a standard memory read (_imv_read()) to be + * enabled. Should be used for markers in code paths where instruction + * modification based enabling is not welcome. (__init and __exit functions, + * lockdep, some traps, printk). + */ +#define _trace_mark(name, format, args...) \ + __trace_mark(1, name, NULL, format, ## args) /** * MARK_NOARGS - Format string for a marker with no argument. -- cgit v1.2.1 From 5b82a1b08a00b2adca3d9dd9777efff40b7aaaa1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:10 +0200 Subject: Port ftrace to markers Porting ftrace to the marker infrastructure. Don't need to chain to the wakeup tracer from the sched tracer, because markers support multiple probes connected. Signed-off-by: Mathieu Desnoyers CC: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 360ca99033d2..c0b1c69b55ce 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2131,38 +2131,6 @@ __trace_special(void *__tr, void *__data, } #endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER -extern void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); -extern void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr); -extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); -#else -static inline void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) -{ -} -static inline void -sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) -{ -} -static inline void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr) -{ -} -static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) -{ -} -static inline void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} -#endif - extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); -- cgit v1.2.1 From 74f4e369fc5b52433ad824cef32d3bf1304549be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:15 +0200 Subject: ftrace: stacktrace fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0d3714e7110b..017ab44d572a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -120,4 +120,12 @@ static inline void tracer_disable(void) # define trace_preempt_off(a0, a1) do { } while (0) #endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } +#endif + #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.1 From d49dbf33f0bf8748ee3662b973eb57e60525d622 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 May 2008 10:41:53 +0200 Subject: ftrace: fix include file dependency Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 017ab44d572a..911d5d80b49f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -4,6 +4,7 @@ #ifdef CONFIG_FTRACE #include +#include extern int ftrace_enabled; extern int -- cgit v1.2.1 From 489f139614596cbc956a06f5e4bb41288e276fe3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 Feb 2008 13:38:05 +0100 Subject: ftrace: fix build bug Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 911d5d80b49f..922e23d0196f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -106,16 +106,16 @@ static inline void tracer_disable(void) #endif #ifdef CONFIG_IRQSOFF_TRACER - extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1); - extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1); + extern void time_hardirqs_on(unsigned long a0, unsigned long a1); + extern void time_hardirqs_off(unsigned long a0, unsigned long a1); #else # define time_hardirqs_on(a0, a1) do { } while (0) # define time_hardirqs_off(a0, a1) do { } while (0) #endif #ifdef CONFIG_PREEMPT_TRACER - extern void notrace trace_preempt_on(unsigned long a0, unsigned long a1); - extern void notrace trace_preempt_off(unsigned long a0, unsigned long a1); + extern void trace_preempt_on(unsigned long a0, unsigned long a1); + extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else # define trace_preempt_on(a0, a1) do { } while (0) # define trace_preempt_off(a0, a1) do { } while (0) -- cgit v1.2.1 From 8b7d89d02ef3c6a7c73d6596f28cea7632850af4 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: x86: mmiotrace - trace memory mapped IO Mmiotrace is a tool for trapping memory mapped IO (MMIO) accesses within the kernel. It is used for debugging and especially for reverse engineering evil binary drivers. Mmiotrace works by wrapping the ioremap family of kernel functions and marking the returned pages as not present. Access to the IO memory triggers a page fault, which will be handled by mmiotrace's custom page fault handler. This will single-step the faulted instruction with the MMIO page marked as present. Access logs are directed to user space via relay and debug_fs. This page fault approach is necessary, because binary drivers have readl/writel etc. calls inlined and therefore extremely difficult to trap with with e.g. kprobes. This patch depends on the custom page fault handlers patch. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 include/linux/mmiotrace.h (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h new file mode 100644 index 000000000000..cb247825f3ec --- /dev/null +++ b/include/linux/mmiotrace.h @@ -0,0 +1,62 @@ +#ifndef MMIOTRACE_H +#define MMIOTRACE_H + +#include + +#define MMIO_VERSION 0x04 + +/* mm_io_header.type */ +#define MMIO_OPCODE_MASK 0xff +#define MMIO_OPCODE_SHIFT 0 +#define MMIO_WIDTH_MASK 0xff00 +#define MMIO_WIDTH_SHIFT 8 +#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) +#define MMIO_MAGIC_MASK 0xffff0000 + +enum mm_io_opcode { /* payload type: */ + MMIO_READ = 0x1, /* struct mm_io_rw */ + MMIO_WRITE = 0x2, /* struct mm_io_rw */ + MMIO_PROBE = 0x3, /* struct mm_io_map */ + MMIO_UNPROBE = 0x4, /* struct mm_io_map */ + MMIO_MARKER = 0x5, /* raw char data */ + MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ +}; + +struct mm_io_header { + __u32 type; + __u32 sec; /* timestamp */ + __u32 nsec; + __u32 pid; /* PID of the process, or 0 for kernel core */ + __u16 data_len; /* length of the following payload */ +}; + +struct mm_io_rw { + __u64 address; /* virtual address of register */ + __u64 value; + __u64 pc; /* optional program counter */ +}; + +struct mm_io_map { + __u64 phys; /* base address in PCI space */ + __u64 addr; /* base virtual address */ + __u64 len; /* mapping size */ + __u64 pc; /* optional program counter */ +}; + + +/* + * These structures are used to allow a single relay_write() + * call to write a full packet. + */ + +struct mm_io_header_rw { + struct mm_io_header header; + struct mm_io_rw rw; +} __attribute__((packed)); + +struct mm_io_header_map { + struct mm_io_header header; + struct mm_io_map map; +} __attribute__((packed)); + +#endif /* MMIOTRACE_H */ -- cgit v1.2.1 From 63ffa3e456c1a9884a3ebac997d91e3fdae18d78 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86 mmiotrace: comment about user space ABI Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index cb247825f3ec..6ec288f1fe24 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -3,6 +3,10 @@ #include +/* + * If you change anything here, you must bump MMIO_VERSION. + * This is the relay data format for user space. + */ #define MMIO_VERSION 0x04 /* mm_io_header.type */ @@ -23,7 +27,7 @@ enum mm_io_opcode { /* payload type: */ }; struct mm_io_header { - __u32 type; + __u32 type; /* see MMIO_* macros above */ __u32 sec; /* timestamp */ __u32 nsec; __u32 pid; /* PID of the process, or 0 for kernel core */ -- cgit v1.2.1 From 0fd0e3da4557c479b820b9a4a7afa25b4637ddf2 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: mmiotrace full patch, preview 1 kmmio.c handles the list of mmio probes with callbacks, list of traced pages, and attaching into the page fault handler and die notifier. It arms, traps and disarms the given pages, this is the core of mmiotrace. mmio-mod.c is a user interface, hooking into ioremap functions and registering the mmio probes. It also decodes the required information from trapped mmio accesses via the pre and post callbacks in each probe. Currently, hooking into ioremap functions works by redefining the symbols of the target (binary) kernel module, so that it calls the traced versions of the functions. The most notable changes done since the last discussion are: - kmmio.c is a built-in, not part of the module - direct call from fault.c to kmmio.c, removing all dynamic hooks - prepare for unregistering probes at any time - make kmmio re-initializable and accessible to more than one user - rewrite kmmio locking to remove all spinlocks from page fault path Can I abuse call_rcu() like I do in kmmio.c:unregister_kmmio_probe() or is there a better way? The function called via call_rcu() itself calls call_rcu() again, will this work or break? There I need a second grace period for RCU after the first grace period for page faults. Mmiotrace itself (mmio-mod.c) is still a module, I am going to attack that next. At some point I will start looking into how to make mmiotrace a tracer component of ftrace (thanks for the hint, Ingo). Ftrace should make the user space part of mmiotracing as simple as 'cat /debug/trace/mmio > dump.txt'. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 6ec288f1fe24..d87a6cd8b686 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -3,6 +3,44 @@ #include +#ifdef __KERNEL__ + +#include + +struct kmmio_probe; +struct pt_regs; + +typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *, + struct pt_regs *, unsigned long addr); +typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, + unsigned long condition, struct pt_regs *); + +struct kmmio_probe { + struct list_head list; + unsigned long addr; /* start location of the probe point */ + unsigned long len; /* length of the probe region */ + kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ + kmmio_post_handler_t post_handler; /* Called after addr is executed */ +}; + +/* kmmio is active by some kmmio_probes? */ +static inline int is_kmmio_active(void) +{ + extern unsigned int kmmio_count; + return kmmio_count; +} + +extern void reference_kmmio(void); +extern void unreference_kmmio(void); +extern int register_kmmio_probe(struct kmmio_probe *p); +extern void unregister_kmmio_probe(struct kmmio_probe *p); + +/* Called from page fault handler. */ +extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); + +#endif /* __KERNEL__ */ + + /* * If you change anything here, you must bump MMIO_VERSION. * This is the relay data format for user space. -- cgit v1.2.1 From d61fc44853f46fb002228b18aa5f30db21fcd4ac Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: mmiotrace, preview 2 Kconfig.debug, Makefile and testmmiotrace.c style fixes. Use real mutex instead of mutex. Fix failure path in register probe func. kmmio: RCU read-locked over single stepping. Generate mapping id's. Make mmio-mod.c built-in and rewrite its locking. Add debugfs file to enable/disable mmiotracing. kmmio: use irqsave spinlocks. Lots of cleanups in mmio-mod.c Marker file moved from /proc into debugfs. Call mmiotrace entrypoints directly from ioremap.c. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index d87a6cd8b686..cb5efd0c7f51 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -16,11 +16,12 @@ typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, unsigned long condition, struct pt_regs *); struct kmmio_probe { - struct list_head list; + struct list_head list; /* kmmio internal list */ unsigned long addr; /* start location of the probe point */ unsigned long len; /* length of the probe region */ kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ kmmio_post_handler_t post_handler; /* Called after addr is executed */ + void *user_data; }; /* kmmio is active by some kmmio_probes? */ @@ -38,6 +39,21 @@ extern void unregister_kmmio_probe(struct kmmio_probe *p); /* Called from page fault handler. */ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); +/* Called from ioremap.c */ +#ifdef CONFIG_MMIOTRACE +extern void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr); +extern void mmiotrace_iounmap(volatile void __iomem *addr); +#else +static inline void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +{ +} +static inline void mmiotrace_iounmap(volatile void __iomem *addr) +{ +} +#endif /* CONFIG_MMIOTRACE_HOOKS */ + #endif /* __KERNEL__ */ -- cgit v1.2.1 From f984b51e0779a6dd30feedc41404013ca54e5d05 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: ftrace: add mmiotrace plugin On Sat, 22 Mar 2008 13:07:47 +0100 Ingo Molnar wrote: > > > i'd suggest the following: pull x86.git and sched-devel.git into a > > > single tree [the two will combine without rejects]. Then try to add a > > > kernel/tracing/trace_mmiotrace.c ftrace plugin. The trace_sysprof.c > > > plugin might be a good example. > > > > I did this and now I have mmiotrace enabled/disabled via the tracing > > framework (what do we call this, since ftrace is one of the tracers?). > > cool! could you send the patches for that? (even if they are not fully > functional yet) Patch attached in the end. Nice to see how much code disappeared. I tried to mark all the features I had to break with XXX-comments. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index cb5efd0c7f51..579b3b06c90e 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,6 +54,12 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ +/* in kernel/trace/trace_mmiotrace.c */ +extern int __init init_mmiotrace(void); +extern void enable_mmiotrace(void); +extern void disable_mmiotrace(void); +extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); + #endif /* __KERNEL__ */ -- cgit v1.2.1 From bd8ac686c73c7e925fcfe0b02dc4e7b947127864 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: ftrace: mmiotrace, updates here is a patch that makes mmiotrace work almost well within the tracing framework. The patch applies on top of my previous patch. I have my own output formatting in place now. Summary of changes: - fix the NULL dereference that was due to not calling tracing_reset() - add print_line() callback into struct tracer - implement print_line() for mmiotrace, producing up-to-spec text - add my output header, but that is not really called in the right place - rewrote the main structs in mmiotrace - added two new trace entry types: TRACE_MMIO_RW and TRACE_MMIO_MAP - made some functions in trace.c non-static - check current==NULL in tracing_generic_entry_update() - fix(?) comparison in trace_seq_printf() Things seem to work fine except a few issues. Markers (text lines injected into mmiotrace log) are missing, I did not feel hacking them in before we have variable length entries. My output header is printed only for 'trace' file, but not 'trace_pipe'. For some reason, despite my quick fix, iter->trace is NULL in print_trace_line() when called from 'trace_pipe' file, which means I don't get proper output formatting. I only tried by loading nouveau.ko, which just detects the card, and that is traced fine. I didn't try further. Map, two reads and unmap. Works perfectly. I am missing the information about overflows, I'd prefer to have a counter for lost events. I didn't try, but I guess currently there is no way of knowning when it overflows? So, not too far from being fully operational, it seems :-) And looking at the diffstat, there also is some 700-900 lines of user space code that just became obsolete. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 85 ++++++++++++++--------------------------------- 1 file changed, 25 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 579b3b06c90e..c88a9c197d22 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,73 +54,38 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ -/* in kernel/trace/trace_mmiotrace.c */ -extern int __init init_mmiotrace(void); -extern void enable_mmiotrace(void); -extern void disable_mmiotrace(void); -extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); - -#endif /* __KERNEL__ */ - - -/* - * If you change anything here, you must bump MMIO_VERSION. - * This is the relay data format for user space. - */ -#define MMIO_VERSION 0x04 - -/* mm_io_header.type */ -#define MMIO_OPCODE_MASK 0xff -#define MMIO_OPCODE_SHIFT 0 -#define MMIO_WIDTH_MASK 0xff00 -#define MMIO_WIDTH_SHIFT 8 -#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) -#define MMIO_MAGIC_MASK 0xffff0000 - -enum mm_io_opcode { /* payload type: */ - MMIO_READ = 0x1, /* struct mm_io_rw */ - MMIO_WRITE = 0x2, /* struct mm_io_rw */ - MMIO_PROBE = 0x3, /* struct mm_io_map */ - MMIO_UNPROBE = 0x4, /* struct mm_io_map */ +enum mm_io_opcode { + MMIO_READ = 0x1, /* struct mmiotrace_rw */ + MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ + MMIO_PROBE = 0x3, /* struct mmiotrace_map */ + MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ MMIO_MARKER = 0x5, /* raw char data */ - MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ + MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */ }; -struct mm_io_header { - __u32 type; /* see MMIO_* macros above */ - __u32 sec; /* timestamp */ - __u32 nsec; - __u32 pid; /* PID of the process, or 0 for kernel core */ - __u16 data_len; /* length of the following payload */ +struct mmiotrace_rw { + unsigned long phys; /* PCI address of register */ + unsigned long value; + unsigned long pc; /* optional program counter */ + int map_id; + unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ + unsigned char width; /* size of register access in bytes */ }; -struct mm_io_rw { - __u64 address; /* virtual address of register */ - __u64 value; - __u64 pc; /* optional program counter */ +struct mmiotrace_map { + unsigned long phys; /* base address in PCI space */ + unsigned long virt; /* base virtual address */ + unsigned long len; /* mapping size */ + int map_id; + unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ }; -struct mm_io_map { - __u64 phys; /* base address in PCI space */ - __u64 addr; /* base virtual address */ - __u64 len; /* mapping size */ - __u64 pc; /* optional program counter */ -}; - - -/* - * These structures are used to allow a single relay_write() - * call to write a full packet. - */ - -struct mm_io_header_rw { - struct mm_io_header header; - struct mm_io_rw rw; -} __attribute__((packed)); +/* in kernel/trace/trace_mmiotrace.c */ +extern void enable_mmiotrace(void); +extern void disable_mmiotrace(void); +extern void mmio_trace_rw(struct mmiotrace_rw *rw); +extern void mmio_trace_mapping(struct mmiotrace_map *map); -struct mm_io_header_map { - struct mm_io_header header; - struct mm_io_map map; -} __attribute__((packed)); +#endif /* __KERNEL__ */ #endif /* MMIOTRACE_H */ -- cgit v1.2.1 From 138295373ccf7625fcb0218dfea114837983bc39 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: mmiotrace update, #2 another weekend, another patch. This should apply on top of my previous patch from March 23rd. Summary of changes: - Print PCI device list in output header - work around recursive probe hits on SMP - refactor dis/arm_kmmio_fault_page() and add check for page levels - remove un/reference_kmmio(), the die notifier hook is registered permanently into the list - explicitly check for single stepping in die notifier callback I have tested this version on my UP Athlon64 desktop with Nouveau, and SMP Core 2 Duo laptop with the proprietary nvidia driver. Both systems are 64-bit. One previously unknown bug crept into daylight: the ftrace framework's output routines print the first entry last after buffer has wrapped around. The most important regressions compared to non-ftrace mmiotrace at this time are: - failure of trace_pipe file - illegal lines in output file - unaware of losing data due to buffer full Personally I'd like to see these three solved before submitting to mainline. Other issues may come up once we know when we lose events. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index c88a9c197d22..dd6b64b160fc 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -31,8 +31,6 @@ static inline int is_kmmio_active(void) return kmmio_count; } -extern void reference_kmmio(void); -extern void unreference_kmmio(void); extern int register_kmmio_probe(struct kmmio_probe *p); extern void unregister_kmmio_probe(struct kmmio_probe *p); -- cgit v1.2.1 From 970e6fa03885f32cc43e42cb08c73a5f54cd8bd9 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: mmiotrace: code style cleanups From c2da03771e29159627c5c7b9509ec70bce9f91ee Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 28 Apr 2008 21:25:22 +0300 Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index dd6b64b160fc..de8e91258da7 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -1,9 +1,7 @@ #ifndef MMIOTRACE_H #define MMIOTRACE_H -#include - -#ifdef __KERNEL__ +#include #include @@ -84,6 +82,4 @@ extern void disable_mmiotrace(void); extern void mmio_trace_rw(struct mmiotrace_rw *rw); extern void mmio_trace_mapping(struct mmiotrace_map *map); -#endif /* __KERNEL__ */ - #endif /* MMIOTRACE_H */ -- cgit v1.2.1 From dee310d0adf41019aca476052ac3085ff286d9be Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: x86 mmiotrace: use resource_size_t for phys addresses Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index de8e91258da7..5cbbc374e945 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -2,7 +2,6 @@ #define MMIOTRACE_H #include - #include struct kmmio_probe; @@ -37,14 +36,15 @@ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); /* Called from ioremap.c */ #ifdef CONFIG_MMIOTRACE -extern void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr); +extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size, + void __iomem *addr); extern void mmiotrace_iounmap(volatile void __iomem *addr); #else -static inline void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +static inline void mmiotrace_ioremap(resource_size_t offset, + unsigned long size, void __iomem *addr) { } + static inline void mmiotrace_iounmap(volatile void __iomem *addr) { } @@ -60,7 +60,7 @@ enum mm_io_opcode { }; struct mmiotrace_rw { - unsigned long phys; /* PCI address of register */ + resource_size_t phys; /* PCI address of register */ unsigned long value; unsigned long pc; /* optional program counter */ int map_id; @@ -69,7 +69,7 @@ struct mmiotrace_rw { }; struct mmiotrace_map { - unsigned long phys; /* base address in PCI space */ + resource_size_t phys; /* base address in PCI space */ unsigned long virt; /* base virtual address */ unsigned long len; /* mapping size */ int map_id; -- cgit v1.2.1 From a50445d76c22a34ae149704ea5adaef171c8acb7 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: mmiotrace: rename kmmio_probe::user_data to :private. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 5cbbc374e945..61d19e1b7a0b 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -18,7 +18,7 @@ struct kmmio_probe { unsigned long len; /* length of the probe region */ kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ kmmio_post_handler_t post_handler; /* Called after addr is executed */ - void *user_data; + void *private; }; /* kmmio is active by some kmmio_probes? */ -- cgit v1.2.1 From 42fdfa238a23643226910acf922ea930b3286032 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 May 2008 23:14:51 +0200 Subject: namespacecheck: more kernel/printk.c fixes [ Stephen Rothwell : build fix ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/kernel.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 792bf0aa779b..f2a668c195bf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -184,9 +184,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) __attribute__ ((format (printf, 1, 0))); asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern int log_buf_get_len(void); -extern int log_buf_read(int idx); -extern int log_buf_copy(char *dest, int idx, int len); extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; @@ -202,9 +199,6 @@ static inline int vprintk(const char *s, va_list args) { return 0; } static inline int printk(const char *s, ...) __attribute__ ((format (printf, 1, 2))); static inline int __cold printk(const char *s, ...) { return 0; } -static inline int log_buf_get_len(void) { return 0; } -static inline int log_buf_read(int idx) { return 0; } -static inline int log_buf_copy(char *dest, int idx, int len) { return 0; } static inline int printk_ratelimit(void) { return 0; } static inline int __printk_ratelimit(int ratelimit_jiffies, \ int ratelimit_burst) { return 0; } -- cgit v1.2.1 From 63687a528c39a67c1a213cdffa09feb0e6af9dbe Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:44:41 +0200 Subject: x86: move tracedata to RODATA .. allowing it to be write-protected just as other read-only data under CONFIG_DEBUG_RODATA. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/resume-trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index f3f4f28c6960..c9ba2fdf807d 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -8,7 +8,7 @@ extern int pm_trace_enabled; struct device; extern void set_trace_device(struct device *); -extern void generate_resume_trace(void *tracedata, unsigned int user); +extern void generate_resume_trace(const void *tracedata, unsigned int user); #define TRACE_DEVICE(dev) do { \ if (pm_trace_enabled) \ -- cgit v1.2.1 From 962cf36c5bf6d2840b8d66ee9a606fae2f540bbd Mon Sep 17 00:00:00 2001 From: "Carlos R. Mafra" Date: Thu, 15 May 2008 11:15:37 -0300 Subject: Remove argument from open_softirq which is always NULL As git-grep shows, open_softirq() is always called with the last argument being NULL block/blk-core.c: open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); kernel/hrtimer.c: open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); kernel/rcuclassic.c: open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); kernel/rcupreempt.c: open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); kernel/sched.c: open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); kernel/softirq.c: open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); kernel/softirq.c: open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); kernel/timer.c: open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); net/core/dev.c: open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); net/core/dev.c: open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); This observation has already been made by Matthew Wilcox in June 2002 (http://www.cs.helsinki.fi/linux/linux-kernel/2002-25/0687.html) "I notice that none of the current softirq routines use the data element passed to them." and the situation hasn't changed since them. So it appears we can safely remove that extra argument to save 128 (54) bytes of kernel data (text). Signed-off-by: Carlos R. Mafra Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f1fc7470d26c..a86186dd0474 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -285,12 +285,11 @@ enum struct softirq_action { void (*action)(struct softirq_action *); - void *data; }; asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); -extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); +extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) extern void raise_softirq_irqoff(unsigned int nr); -- cgit v1.2.1 From e9197bf0114661195bee35e7795cfc42164d9b2c Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:10 -0700 Subject: x86 boot: remove some unused extern function declarations Remove three extern declarations for routines that don't exist. Fix a typo in a comment. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar --- include/linux/efi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index a5f359a7ad0e..807373d467f7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -287,7 +287,6 @@ efi_guid_unparse(efi_guid_t *guid, char *out) extern void efi_init (void); extern void *efi_get_pal_addr (void); extern void efi_map_pal_code (void); -extern void efi_map_memmap(void); extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg); extern void efi_gettimeofday (struct timespec *ts); extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ @@ -295,14 +294,11 @@ extern u64 efi_get_iobase (void); extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); -extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, - u64 attr); extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern unsigned long efi_get_time(void); extern int efi_set_rtc_mmss(unsigned long nowtime); -extern int is_available_memory(efi_memory_desc_t * md); extern struct efi_memory_map memmap; /** -- cgit v1.2.1 From c801ed3860fe2f84281d4cae4c3e6ca87e81e890 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:23 -0700 Subject: x86 boot: simplify pageblock_bits enum declaration The use of #defines with '##' pre-processor concatenation is a useful way to form several symbol names with a common pattern. But when there is just a single name obtained from that #define, it's just obfuscation. Better to just write the plain symbol name, as is. The following patch is a result of my wasting ten minutes looking through the kernel to figure out what 'PB_migrate_end' meant, and forgetting what I came to do, by the time I figured out that the #define PB_range macro defined it. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar --- include/linux/pageblock-flags.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e875905f7b12..e8c06122be36 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -25,13 +25,11 @@ #include -/* Macro to aid the definition of ranges of bits */ -#define PB_range(name, required_bits) \ - name, name ## _end = (name + required_bits) - 1 - /* Bit indices that affect a whole block of pages */ enum pageblock_bits { - PB_range(PB_migrate, 3), /* 3 bits required for migrate types */ + PB_migrate, + PB_migrate_end = PB_migrate + 3 - 1, + /* 3 bits required for migrate types */ NR_PAGEBLOCK_BITS }; -- cgit v1.2.1 From 41c52c0db9607e59f90da7da5309489fa06e887f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 11:46:33 -0400 Subject: ftrace: set_ftrace_notrace feature While debugging latencies in the RT kernel, I found that it would be nice to be able to filter away functions from the trace than just to filter on functions. I added a new interface to the debugfs tracing directory called set_ftrace_notrace When dynamic frace is enabled, this lets you filter away functions that will not be recorded in the trace. It is similar to adding 'notrace' to those functions but by doing it without recompiling the kernel. Here's how set_ftrace_filter and set_ftrace_notrace interact. Remember, if set_ftrace_filter is set, it removes all functions from the trace execpt for those listed in the set_ftrace_filter. set_ftrace_notrace will prevent those functions from being traced. If you were to set one function in both set_ftrace_filter and set_ftrace_notrace and that function was the same, then you would end up with an empty trace. the set of functions to trace is: set_ftrace_filter == empty then all functions not in set_ftrace_notrace else set of the set_ftrace_filter and not in set of set_ftrace_notrace. Signed-off-by: Steven Rostedt Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 922e23d0196f..ffbbd54a720e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -48,6 +48,7 @@ enum { FTRACE_FL_FAILED = (1 << 1), FTRACE_FL_FILTER = (1 << 2), FTRACE_FL_ENABLED = (1 << 3), + FTRACE_FL_NOTRACE = (1 << 4), }; struct dyn_ftrace { -- cgit v1.2.1 From 9e124fe16ff24746d6de5a2ad685266d7bce0e08 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:07 +0100 Subject: xen: Enable console tty by default in domU if it's not a dummy Without console= arguments on the kernel command line, the first console to register becomes enabled and the preferred console (the one behind /dev/console). This is normally tty (assuming CONFIG_VT_CONSOLE is enabled, which it commonly is). This is okay as long tty is a useful console. But unless we have the PV framebuffer, and it is enabled for this domain, tty0 in domU is merely a dummy. In that case, we want the preferred console to be the Xen console hvc0, and we want it without having to fiddle with the kernel command line. Commit b8c2d3dfbc117dff26058fbac316b8acfc2cb5f7 did that for us. Since we now have the PV framebuffer, we want to enable and prefer tty again, but only when PVFB is enabled. But even then we still want to enable the Xen console as well. Problem: when tty registers, we can't yet know whether the PVFB is enabled. By the time we can know (xenstore is up), the console setup game is over. Solution: enable console tty by default, but keep hvc as the preferred console. Change the preferred console to tty when PVFB probes successfully, unless we've been given console kernel parameters. Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- include/linux/console.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index a4f27fbdf549..248e6e3b9b73 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -108,6 +108,8 @@ struct console { struct console *next; }; +extern int console_set_on_cmdline; + extern int add_preferred_console(char *name, int idx, char *options); extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options); extern void register_console(struct console *); -- cgit v1.2.1 From 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:27 +0100 Subject: xen: implement save/restore This patch implements Xen save/restore and migration. Saving is triggered via xenbus, which is polled in drivers/xen/manage.c. When a suspend request comes in, the kernel prepares itself for saving by: 1 - Freeze all processes. This is primarily to prevent any partially-completed pagetable updates from confusing the suspend process. If CONFIG_PREEMPT isn't defined, then this isn't necessary. 2 - Suspend xenbus and other devices 3 - Stop_machine, to make sure all the other vcpus are quiescent. The Xen tools require the domain to run its save off vcpu0. 4 - Within the stop_machine state, it pins any unpinned pgds (under construction or destruction), performs canonicalizes various other pieces of state (mostly converting mfns to pfns), and finally 5 - Suspend the domain Restore reverses the steps used to save the domain, ending when all the frozen processes are thawed. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- include/linux/page-flags.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 590cff32415d..02955a1c3d76 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -157,6 +157,7 @@ PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */ PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */ +PAGEFLAG(SavePinned, dirty); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) -- cgit v1.2.1 From b1829d2705daa7cb72eb1e08bdc8b7e9fad34266 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 May 2008 01:22:08 +0200 Subject: ftrace: fix merge Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ffbbd54a720e..b482fe88bc04 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -122,7 +122,7 @@ static inline void tracer_disable(void) # define trace_preempt_off(a0, a1) do { } while (0) #endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER +#ifdef CONFIG_TRACING extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); #else -- cgit v1.2.1 From ad90c0e3ce8d20d6873b57e36181ef6d7a0097fe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 27 May 2008 20:48:37 -0400 Subject: ftrace: user update and disable dynamic ftrace daemon In dynamic ftrace, the mcount function starts off pointing to a stub function that just returns. On start up, the call to the stub is modified to point to a "record_ip" function. The job of the record_ip function is to add the function to a pre-allocated hash list. If the function is already there, it simply is ignored, otherwise it is added to the list. Later, a ftraced daemon wakes up and calls kstop_machine if any functions have been recorded, and changes the calls to the recorded functions to a simple nop. If no functions were recorded, the daemon goes back to sleep. The daemon wakes up once a second to see if it needs to update any newly recorded functions into nops. Usually it does not, but if a lot of code has been executed for the first time in the kernel, the ftraced daemon will call kstop_machine to update those into nops. The problem currently is that there's no way to stop the daemon from doing this, and it can cause unneeded latencies (800us which for some is bothersome). This patch adds a new file /debugfs/tracing/ftraced_enabled. If the daemon is active, reading this will return "enabled\n" and "disabled\n" when the daemon is not running. To disable the daemon, the user can echo "0" or "disable" into this file, and "1" or "enable" to re-enable the daemon. Since the daemon is used to convert the functions into nops to increase the performance of the system, I also added that anytime something is written into the ftraced_enabled file, kstop_machine will run if there are new functions that have been detected that need to be converted. This way the user can disable the daemon but still be able to control the conversion of the mcount calls to nops by simply, "echo 0 > /debugfs/tracing/ftraced_enabled" when they need to do more conversions. To see the number of converted functions: "cat /debugfs/tracing/dyn_ftrace_total_info" Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b482fe88bc04..623819433ed5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -72,9 +72,15 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); + +void ftrace_disable_daemon(void); +void ftrace_enable_daemon(void); + #else # define ftrace_force_update() ({ 0; }) # define ftrace_set_filter(buf, len, reset) do { } while (0) +# define ftrace_disable_daemon() do { } while (0) +# define ftrace_enable_daemon() do { } while (0) #endif /* totally disable ftrace - can not re-enable after this */ -- cgit v1.2.1 From 554ec22f075d46e4363520a407d2b7eeb5dfdd43 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:03 +0200 Subject: namespacecheck: more sched.c fixes [ Stephen Rothwell : build fix ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ae0be3c62375..dc36c3aea018 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -134,7 +134,6 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); -extern unsigned long weighted_cpuload(const int cpu); struct seq_file; struct cfs_rq; @@ -823,23 +822,6 @@ extern int arch_reinit_sched_domains(void); #endif /* CONFIG_SMP */ -/* - * A runqueue laden with a single nice 0 task scores a weighted_cpuload of - * SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a - * task of nice 0 or enough lower priority tasks to bring up the - * weighted_cpuload - */ -static inline int above_background_load(void) -{ - unsigned long cpu; - - for_each_online_cpu(cpu) { - if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE) - return 1; - } - return 0; -} - struct io_context; /* See blkdev.h */ #define NGROUPS_SMALL 32 #define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) -- cgit v1.2.1 From c7aceaba042702538b23cf4e0de1b2891ad8e671 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Thu, 15 May 2008 12:09:15 +0100 Subject: sched: reorder task_struct to reduce padding on 64bit builds This patch removes 24 bytes of padding and allows 1 extra object per slab on my fedora based config. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index dc36c3aea018..ea2857b99596 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1021,6 +1021,7 @@ struct task_struct { #endif int prio, static_prio, normal_prio; + unsigned int rt_priority; const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; @@ -1104,7 +1105,6 @@ struct task_struct { int __user *set_child_tid; /* CLONE_CHILD_SETTID */ int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ - unsigned int rt_priority; cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; cputime_t prev_utime, prev_stime; @@ -1123,12 +1123,12 @@ struct task_struct { gid_t gid,egid,sgid,fsgid; struct group_info *group_info; kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; - unsigned securebits; struct user_struct *user; + unsigned securebits; #ifdef CONFIG_KEYS + unsigned char jit_keyring; /* default keyring to attach requested keys to */ struct key *request_key_auth; /* assumed request_key authority */ struct key *thread_keyring; /* keyring private to this thread */ - unsigned char jit_keyring; /* default keyring to attach requested keys to */ #endif char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock @@ -1215,8 +1215,8 @@ struct task_struct { # define MAX_LOCK_DEPTH 48UL u64 curr_chain_key; int lockdep_depth; - struct held_lock held_locks[MAX_LOCK_DEPTH]; unsigned int lockdep_recursion; + struct held_lock held_locks[MAX_LOCK_DEPTH]; #endif /* journalling filesystem info */ @@ -1244,10 +1244,6 @@ struct task_struct { u64 acct_vm_mem1; /* accumulated virtual memory usage */ cputime_t acct_stimexpd;/* stime since last update */ #endif -#ifdef CONFIG_NUMA - struct mempolicy *mempolicy; - short il_next; -#endif #ifdef CONFIG_CPUSETS nodemask_t mems_allowed; int cpuset_mems_generation; @@ -1266,6 +1262,10 @@ struct task_struct { #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; +#endif +#ifdef CONFIG_NUMA + struct mempolicy *mempolicy; + short il_next; #endif atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; -- cgit v1.2.1 From 1f11eb6a8bc92536d9e93ead48fa3ffbd1478571 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Wed, 4 Jun 2008 15:04:05 -0400 Subject: sched: fix cpupri hotplug support The RT folks over at RedHat found an issue w.r.t. hotplug support which was traced to problems with the cpupri infrastructure in the scheduler: https://bugzilla.redhat.com/show_bug.cgi?id=449676 This bug affects 23-rt12+, 24-rtX, 25-rtX, and sched-devel. This patch applies to 25.4-rt4, though it should trivially apply to most cpupri enabled kernels mentioned above. It turned out that the issue was that offline cpus could get inadvertently registered with cpupri so that they were erroneously selected during migration decisions. The end result would be an OOPS as the offline cpu had tasks routed to it. This patch generalizes the old join/leave domain interface into an online/offline interface, and adjusts the root-domain/hotplug code to utilize it. I was able to easily reproduce the issue prior to this patch, and am no longer able to reproduce it after this patch. I can offline cpus indefinately and everything seems to be in working order. Thanks to Arnaldo (acme), Thomas, and Peter for doing the legwork to point me in the right direction. Also thank you to Peter for reviewing the early iterations of this patch. Signed-off-by: Gregory Haskins Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ea2857b99596..d25acf600a32 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -903,8 +903,8 @@ struct sched_class { void (*set_cpus_allowed)(struct task_struct *p, const cpumask_t *newmask); - void (*join_domain)(struct rq *rq); - void (*leave_domain)(struct rq *rq); + void (*rq_online)(struct rq *rq); + void (*rq_offline)(struct rq *rq); void (*switched_from) (struct rq *this_rq, struct task_struct *task, int running); -- cgit v1.2.1 From cc1a9d86ce989083703c4bdc11b75a87e1cc404a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Jun 2008 19:39:16 -0700 Subject: mm, x86: shrink_active_range() should check all Now we are using register_e820_active_regions() instead of add_active_range() directly. So end_pfn could be different between the value in early_node_map to node_end_pfn. So we need to make shrink_active_range() smarter. shrink_active_range() is a generic MM function in mm/page_alloc.c but it is only used on 32-bit x86. Should we move it back to some file in arch/x86? Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/mm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c31a9cd2a30e..7cbd949f2516 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -997,8 +997,7 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat, extern void free_area_init_nodes(unsigned long *max_zone_pfn); extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn, - unsigned long new_end_pfn); +extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn); extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); -- cgit v1.2.1 From 0eb967012ea15e6e8cfab483d9fa37bc602d400c Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sun, 1 Jun 2008 21:47:30 +0530 Subject: ftrace: prevent freeing of all failed updates Prevent freeing of records which cause problems and correspond to function from core kernel text. A new flag, FTRACE_FL_CONVERTED is used to mark a record as "converted". All other records are patched lazily to NOPs. Failed records now also remain on frace_hash table. Each invocation of ftrace_record_ip now checks whether the traced function has ever been recorded (including past failures) and doesn't re-record it again. Signed-off-by: Abhishek Sagar Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 623819433ed5..20e14d0093c7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -49,6 +49,7 @@ enum { FTRACE_FL_FILTER = (1 << 2), FTRACE_FL_ENABLED = (1 << 3), FTRACE_FL_NOTRACE = (1 << 4), + FTRACE_FL_CONVERTED = (1 << 5), }; struct dyn_ftrace { -- cgit v1.2.1 From 9985b0bab332289f14837eff3c6e0bcc658b58f7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 5 Jun 2008 12:57:11 -0700 Subject: sched: prevent bound kthreads from changing cpus_allowed Kthreads that have called kthread_bind() are bound to specific cpus, so other tasks should not be able to change their cpus_allowed from under them. Otherwise, it is possible to move kthreads, such as the migration or software watchdog threads, so they are not allowed access to the cpu they work on. Cc: Peter Zijlstra Cc: Paul Menage Cc: Paul Jackson Signed-off-by: David Rientjes Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d25acf600a32..2db1485f865d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1486,6 +1486,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ +#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ -- cgit v1.2.1 From e17ba73b0ee6c0f24393c48b455e0d8db761782c Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 12 May 2008 15:44:40 +0200 Subject: x86, generic: mark early_printk as asmlinkage It's not explicitly marked as asmlinkage, but invoked from x86_32 startup code with parameters on stack. No other architectures define early_printk and none of them are affected by this change, since defines asmlinkage as empty token. Signed-off-by: Jiri Slaby Cc: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f2a668c195bf..4cb8d3df414e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -207,7 +207,7 @@ static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ { return false; } #endif -extern void __attribute__((format(printf, 1, 2))) +extern void asmlinkage __attribute__((format(printf, 1, 2))) early_printk(const char *fmt, ...); unsigned long int_sqrt(unsigned long); -- cgit v1.2.1 From 443cd507ce7f78c6f8742b72736585c031d5a921 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Fri, 20 Jun 2008 16:39:21 +0800 Subject: lockdep: add lock_class information to lock_chain and output it This patch records array of lock_class into lock_chain, and export lock_chain information via /proc/lockdep_chains. It is based on x86/master branch of git-x86 tree, and has been tested on x86_64 platform. Signed-off-by: Huang Ying Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 4c4d236ded18..b26fbc715a50 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -182,6 +182,9 @@ struct lock_list { * We record lock dependency chains, so that we can cache them: */ struct lock_chain { + u8 irq_context; + u8 depth; + u16 base; struct list_head entry; u64 chain_key; }; -- cgit v1.2.1 From 0b2806768899dba5967bcd4a3b93eaed9a1dc4f3 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 18 May 2008 14:27:41 -0600 Subject: Add cycle_kernel_lock() A number of driver functions are so obviously trivial that they do not need the big kernel lock - at least not overtly. It turns out that the acquisition of the BKL in driver open() functions can perform a sort of poor-hacker's serialization function, delaying the open operation until the driver is certain to have completed its initialization. Add a simple cycle_kernel_lock() function for these cases to make it clear that there is no need to *hold* the BKL, just to be sure that we can acquire it. Signed-off-by: Jonathan Corbet --- include/linux/smp_lock.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index aab3a4cff4e1..813be59bf345 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -27,11 +27,24 @@ static inline int reacquire_kernel_lock(struct task_struct *task) extern void __lockfunc lock_kernel(void) __acquires(kernel_lock); extern void __lockfunc unlock_kernel(void) __releases(kernel_lock); +/* + * Various legacy drivers don't really need the BKL in a specific + * function, but they *do* need to know that the BKL became available. + * This function just avoids wrapping a bunch of lock/unlock pairs + * around code which doesn't really need it. + */ +static inline void cycle_kernel_lock(void) +{ + lock_kernel(); + unlock_kernel(); +} + #else #define lock_kernel() do { } while(0) #define unlock_kernel() do { } while(0) #define release_kernel_lock(task) do { } while(0) +#define cycle_kernel_lock() do { } while(0) #define reacquire_kernel_lock(task) 0 #define kernel_locked() 1 -- cgit v1.2.1 From 395a59d0f8e86bb39cd700c3d185d30c670bb958 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 21 Jun 2008 23:47:27 +0530 Subject: ftrace: store mcount address in rec->ip Record the address of the mcount call-site. Currently all archs except sparc64 record the address of the instruction following the mcount call-site. Some general cleanups are entailed. Storing mcount addresses in rec->ip enables looking them up in the kprobe hash table later on to check if they're kprobe'd. Signed-off-by: Abhishek Sagar Cc: davem@davemloft.net Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 20e14d0093c7..366098d591de 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -31,7 +31,6 @@ int unregister_ftrace_function(struct ftrace_ops *ops); void clear_ftrace_function(void); extern void ftrace_stub(unsigned long a0, unsigned long a1); -extern void mcount(void); #else /* !CONFIG_FTRACE */ # define register_ftrace_function(ops) do { } while (0) @@ -54,7 +53,7 @@ enum { struct dyn_ftrace { struct hlist_node node; - unsigned long ip; + unsigned long ip; /* address of mcount call-site */ unsigned long flags; }; -- cgit v1.2.1 From 785656a41f9a9c0e843a23d1ae05d900b5158f8f Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 21 Jun 2008 23:47:39 +0530 Subject: kprobes: enable clean usage of get_kprobe Allow clean use of get_kprobe() outside of core kprobe code. Ftrace makes use of get_kprobe to identify probes installed on mcount call-sites. Signed-off-by: Abhishek Sagar Acked-by: Ananth N Mavinakayanahalli Cc: Masami Hiramatsu Cc: jkenisto@us.ibm.com Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 1036631ff4fa..04a3556bdea6 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -259,6 +259,10 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); struct jprobe; struct kretprobe; +static inline struct kprobe *get_kprobe(void *addr) +{ + return NULL; +} static inline struct kprobe *kprobe_running(void) { return NULL; -- cgit v1.2.1 From ecea656d1d5e912d2f3d332657ea4a6d8380f891 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 21 Jun 2008 23:47:53 +0530 Subject: ftrace: freeze kprobe'd records Let records identified as being kprobe'd be marked as "frozen". The trouble with records which have a kprobe installed on their mcount call-site is that they don't get updated. So if such a function which is currently being traced gets its tracing disabled due to a new filter rule (or because it was added to the notrace list) then it won't be updated and continue being traced. This patch allows scanning of all frozen records during tracing to check if they should be traced. Signed-off-by: Abhishek Sagar Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 366098d591de..3121b95443d9 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -49,6 +49,7 @@ enum { FTRACE_FL_ENABLED = (1 << 3), FTRACE_FL_NOTRACE = (1 << 4), FTRACE_FL_CONVERTED = (1 << 5), + FTRACE_FL_FROZEN = (1 << 6), }; struct dyn_ftrace { @@ -73,15 +74,18 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +extern int skip_trace(unsigned long ip); + void ftrace_disable_daemon(void); void ftrace_enable_daemon(void); #else +# define skip_trace(ip) ({ 0; }) # define ftrace_force_update() ({ 0; }) # define ftrace_set_filter(buf, len, reset) do { } while (0) # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); -- cgit v1.2.1 From 3da757daf86e498872855f0b5e101f763ba79499 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 20 Jun 2008 15:06:33 -0700 Subject: x86: use cpu_khz for loops_per_jiffy calculation On the x86 platform we can use the value of tsc_khz computed during tsc calibration to calculate the loops_per_jiffy value. Its very important to keep the error in lpj values to minimum as any error in that may result in kernel panic in check_timer. In virtualization environment, On a highly overloaded host the guest delay calibration may sometimes result in errors beyond the ~50% that timer_irq_works can handle, resulting in the guest panicking. Does some formating changes to lpj_setup code to now have a single printk to print the bogomips value. We do this only for the boot processor because the AP's can have different base frequencies or the BIOS might boot a AP at a different frequency. Signed-off-by: Alok N Kataria Cc: Arjan van de Ven Cc: Daniel Hecht Cc: Tim Mann Cc: Zach Amsden Cc: Sahil Rihan Signed-off-by: Ingo Molnar --- include/linux/delay.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/delay.h b/include/linux/delay.h index 54552d21296e..01aec60590ab 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -41,6 +41,7 @@ static inline void ndelay(unsigned long x) #define ndelay(x) ndelay(x) #endif +extern unsigned long lpj_tsc; void calibrate_delay(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); -- cgit v1.2.1 From 961ccddd59d627b89bd3dc284b6517833bbdf25d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 23 Jun 2008 13:55:38 +1000 Subject: sched: add new API sched_setscheduler_nocheck: add a flag to control access checks Hidehiro Kawai noticed that sched_setscheduler() can fail in stop_machine: it calls sched_setscheduler() from insmod, which can have CAP_SYS_MODULE without CAP_SYS_NICE. Two cases could have failed, so are changed to sched_setscheduler_nocheck: kernel/softirq.c:cpu_callback() - CPU hotplug callback kernel/stop_machine.c:__stop_machine_run() - Called from various places, including modprobe() Signed-off-by: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Hidehiro Kawai Cc: Andrew Morton Cc: linux-mm@kvack.org Cc: sugita Cc: Satoshi OSHIMA Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c5d3f847ca8d..fe3b9b5d7390 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1655,6 +1655,8 @@ extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); +extern int sched_setscheduler_nocheck(struct task_struct *, int, + struct sched_param *); extern struct task_struct *idle_task(int cpu); extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); -- cgit v1.2.1 From a033c332e047397904ed74816946b2edd9b0d5cd Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 23 Jun 2008 10:52:42 +0800 Subject: lockdep: remove duplicate definition of STATIC_LOCKDEP_MAP_INIT STATIC_LOCKDEP_MAP_INIT is defined twice in lockdep.h. I guess it's a copy & paste. Signed-off-by: Li Zefan Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b26fbc715a50..2486eb4edbf1 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -278,14 +278,6 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, lockdep_init_map(&(lock)->dep_map, #lock, \ (lock)->dep_map.key, sub) -/* - * To initialize a lockdep_map statically use this macro. - * Note that _name must not be NULL. - */ -#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ - { .name = (_name), .key = (void *)(_key), } - - /* * Acquire a lock. * -- cgit v1.2.1 From f3f3149f35b9195ef4b761b1353fc0766b5f53be Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 23 Jun 2008 18:21:56 -0700 Subject: x86: use cpu_khz for loops_per_jiffy calculation, cleanup As suggested by Ingo, remove all references to tsc from init/calibrate.c TSC is x86 specific, and using tsc in variable names in a generic file should be avoided. lpj_tsc is now called lpj_fine, since it is related to fine tuning of lpj value. Also tsc_rate_* is called timer_rate_* Signed-off-by: Alok N Kataria Cc: Arjan van de Ven Cc: Daniel Hecht Cc: Tim Mann Cc: Zach Amsden Cc: Sahil Rihan Signed-off-by: Ingo Molnar --- include/linux/delay.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/delay.h b/include/linux/delay.h index 01aec60590ab..fd832c6d419e 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -41,7 +41,7 @@ static inline void ndelay(unsigned long x) #define ndelay(x) ndelay(x) #endif -extern unsigned long lpj_tsc; +extern unsigned long lpj_fine; void calibrate_delay(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); -- cgit v1.2.1 From c09595f63bb1909c5dc4dca288f4fe818561b5f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:14 +0200 Subject: sched: revert revert of: fair-group: SMP-nice for group scheduling Try again.. Initial commit: 18d95a2832c1392a2d63227a7a6d433cb9f2037e Revert: 6363ca57c76b7b83639ca8c83fc285fa26a7880e Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index eaf821072dbd..97a58b622ee1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -765,6 +765,7 @@ struct sched_domain { struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ cpumask_t span; /* span of all CPUs in this domain */ + int first_cpu; /* cache of the first cpu in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ -- cgit v1.2.1 From b6a86c746f5b708012809958462234d19e9c8177 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:18 +0200 Subject: sched: fix sched_domain aggregation Keeping the aggregate on the first cpu of the sched domain has two problems: - it could collide between different sched domains on different cpus - it could slow things down because of the remote accesses Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 97a58b622ee1..eaf821072dbd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -765,7 +765,6 @@ struct sched_domain { struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ cpumask_t span; /* span of all CPUs in this domain */ - int first_cpu; /* cache of the first cpu in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ -- cgit v1.2.1 From 2398f2c6d34b43025f274fc42eaca34d23ec2320 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:35 +0200 Subject: sched: update shares on wakeup We found that the affine wakeup code needs rather accurate load figures to be effective. The trouble is that updating the load figures is fairly expensive with group scheduling. Therefore ratelimit the updating. Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index eaf821072dbd..835b6c6fcc56 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -783,6 +783,8 @@ struct sched_domain { unsigned int balance_interval; /* initialise to 1. units in ms. */ unsigned int nr_balance_failed; /* initialise to 0 */ + u64 last_update; + #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; @@ -1605,6 +1607,7 @@ extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_sched_shares_ratelimit; int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, -- cgit v1.2.1 From 9465efc9e96135a2cec8154c0c766fa59984a298 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 27 Jun 2008 11:05:24 +0200 Subject: Remove BKL from remote_llseek v2 - Replace remote_llseek with generic_file_llseek_unlocked (to force compilation failures in all users) - Change all users to either use generic_file_llseek_unlocked directly or take the BKL around. I changed the file systems who don't use the BKL for anything (CIFS, GFS) to call it directly. NCPFS and SMBFS and NFS take the BKL, but explicitely in their own source now. I moved them all over in a single patch to avoid unbisectable sections. Open problem: 32bit kernels can corrupt fpos because its modification is not atomic, but they can do that anyways because there's other paths who modify it without BKL. Do we need a special lock for the pos/f_version = 0 checks? Trond says the NFS BKL is likely not needed, but keep it for now until his full audit. v2: Use generic_file_llseek_unlocked instead of remote_llseek_unlocked and factor duplicated code (suggested by hch) Cc: Trond.Myklebust@netapp.com Cc: swhiteho@redhat.com Cc: sfrench@samba.org Cc: vandrove@vc.cvut.cz Signed-off-by: Andi Kleen Signed-off-by: Andi Kleen Signed-off-by: Jonathan Corbet --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f413085f748e..b158e5161bca 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1871,7 +1871,8 @@ extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); -extern loff_t remote_llseek(struct file *file, loff_t offset, int origin); +extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, + int origin); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -- cgit v1.2.1 From 02c62304e6af60f1963695c6bc1bbffe619aa585 Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Wed, 11 Jun 2008 09:12:52 +0200 Subject: Added in user-injected messages into blk traces This allows a user to annotate the blk trace stream: writing a suitable message to {/sys/kernel/debug}/block//msg will have it propagated into the trace stream. Signed-off-by: Alan D. Brunelle Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index e3ef903aae88..d084b8d227a5 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -129,6 +129,7 @@ struct blk_trace { u32 dev; struct dentry *dir; struct dentry *dropped_file; + struct dentry *msg_file; atomic_t dropped; }; -- cgit v1.2.1 From 244b4d56f85bcd11b21ab0b94845a3dabeed5c10 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 12 Jun 2008 20:12:36 +0200 Subject: block: kill request_queue_t Everything was moved to struct request_queue a few kernel revisions ago, maintaining the deprecated typedef to avoid breaking things. Now the time has come to get rid of that typedef. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d2a1b71e93c3..6a3da6717135 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -23,7 +23,6 @@ struct scsi_ioctl_command; struct request_queue; -typedef struct request_queue request_queue_t __deprecated; struct elevator_queue; typedef struct elevator_queue elevator_t; struct request_pm_state; -- cgit v1.2.1 From 51d654e1d885607a6edd02b337105fa5c28b6d33 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 17 Jun 2008 18:59:56 +0200 Subject: block: Globalize bio_set and bio_vec_slab Move struct bio_set and biovec_slab definitions to bio.h so they can be used outside of bio.c. Signed-off-by: Martin K. Petersen Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/bio.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 61c15eaf3fb3..49dfb3cb7460 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -333,6 +333,35 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, int, int); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); +extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); + +/* + * bio_set is used to allow other portions of the IO system to + * allocate their own private memory pools for bio and iovec structures. + * These memory pools in turn all allocate from the bio_slab + * and the bvec_slabs[]. + */ +#define BIO_POOL_SIZE 2 +#define BIOVEC_NR_POOLS 6 + +struct bio_set { + mempool_t *bio_pool; + mempool_t *bvec_pools[BIOVEC_NR_POOLS]; +}; + +struct biovec_slab { + int nr_vecs; + char *name; + struct kmem_cache *slab; +}; + +extern struct bio_set *fs_bio_set; + +/* + * a small number of entries is fine, not going to be performance critical. + * basically we just need to survive + */ +#define BIO_SPLIT_ENTRIES 2 #ifdef CONFIG_HIGHMEM /* -- cgit v1.2.1 From 7ba1ba12eeef0aa7113beb16410ef8b7c748e18b Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 30 Jun 2008 20:04:41 +0200 Subject: block: Block layer data integrity support Some block devices support verifying the integrity of requests by way of checksums or other protection information that is submitted along with the I/O. This patch implements support for generating and verifying integrity metadata, as well as correctly merging, splitting and cloning bios and requests that have this extra information attached. See Documentation/block/data-integrity.txt for more information. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/bio.h | 94 +++++++++++++++++++++++++++++++++++++++++-- include/linux/blkdev.h | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/genhd.h | 3 ++ 3 files changed, 198 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 49dfb3cb7460..6bfc3e8d9d89 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -64,6 +64,7 @@ struct bio_vec { struct bio_set; struct bio; +struct bio_integrity_payload; typedef void (bio_end_io_t) (struct bio *, int); typedef void (bio_destructor_t) (struct bio *); @@ -112,6 +113,9 @@ struct bio { atomic_t bi_cnt; /* pin count */ void *bi_private; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + struct bio_integrity_payload *bi_integrity; /* data integrity */ +#endif bio_destructor_t *bi_destructor; /* destructor */ }; @@ -271,6 +275,29 @@ static inline void *bio_data(struct bio *bio) */ #define bio_get(bio) atomic_inc(&(bio)->bi_cnt) +#if defined(CONFIG_BLK_DEV_INTEGRITY) +/* + * bio integrity payload + */ +struct bio_integrity_payload { + struct bio *bip_bio; /* parent bio */ + struct bio_vec *bip_vec; /* integrity data vector */ + + sector_t bip_sector; /* virtual start sector */ + + void *bip_buf; /* generated integrity data */ + bio_end_io_t *bip_end_io; /* saved I/O completion fn */ + + int bip_error; /* saved I/O error */ + unsigned int bip_size; + + unsigned short bip_pool; /* pool the ivec came from */ + unsigned short bip_vcnt; /* # of integrity bio_vecs */ + unsigned short bip_idx; /* current bip_vec index */ + + struct work_struct bip_work; /* I/O completion */ +}; +#endif /* CONFIG_BLK_DEV_INTEGRITY */ /* * A bio_pair is used when we need to split a bio. @@ -283,10 +310,14 @@ static inline void *bio_data(struct bio *bio) * in bio2.bi_private */ struct bio_pair { - struct bio bio1, bio2; - struct bio_vec bv1, bv2; - atomic_t cnt; - int error; + struct bio bio1, bio2; + struct bio_vec bv1, bv2; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + struct bio_integrity_payload bip1, bip2; + struct bio_vec iv1, iv2; +#endif + atomic_t cnt; + int error; }; extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors); @@ -334,6 +365,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); +extern unsigned int bvec_nr_vecs(unsigned short idx); /* * bio_set is used to allow other portions of the IO system to @@ -346,6 +378,9 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set struct bio_set { mempool_t *bio_pool; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + mempool_t *bio_integrity_pool; +#endif mempool_t *bvec_pools[BIOVEC_NR_POOLS]; }; @@ -410,5 +445,56 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) +#if defined(CONFIG_BLK_DEV_INTEGRITY) + +#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) +#define bip_vec(bip) bip_vec_idx(bip, 0) + +#define __bip_for_each_vec(bvl, bip, i, start_idx) \ + for (bvl = bip_vec_idx((bip), (start_idx)), i = (start_idx); \ + i < (bip)->bip_vcnt; \ + bvl++, i++) + +#define bip_for_each_vec(bvl, bip, i) \ + __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) + +#define bio_integrity(bio) ((bio)->bi_integrity ? 1 : 0) + +extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); +extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); +extern void bio_integrity_free(struct bio *, struct bio_set *); +extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); +extern int bio_integrity_enabled(struct bio *bio); +extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); +extern int bio_integrity_get_tag(struct bio *, void *, unsigned int); +extern int bio_integrity_prep(struct bio *); +extern void bio_integrity_endio(struct bio *, int); +extern void bio_integrity_advance(struct bio *, unsigned int); +extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); +extern void bio_integrity_split(struct bio *, struct bio_pair *, int); +extern int bio_integrity_clone(struct bio *, struct bio *, struct bio_set *); +extern int bioset_integrity_create(struct bio_set *, int); +extern void bioset_integrity_free(struct bio_set *); +extern void bio_integrity_init_slab(void); + +#else /* CONFIG_BLK_DEV_INTEGRITY */ + +#define bio_integrity(a) (0) +#define bioset_integrity_create(a, b) (0) +#define bio_integrity_prep(a) (0) +#define bio_integrity_enabled(a) (0) +#define bio_integrity_clone(a, b, c) (0) +#define bioset_integrity_free(a) do { } while (0) +#define bio_integrity_free(a, b) do { } while (0) +#define bio_integrity_endio(a, b) do { } while (0) +#define bio_integrity_advance(a, b) do { } while (0) +#define bio_integrity_trim(a, b, c) do { } while (0) +#define bio_integrity_split(a, b, c) do { } while (0) +#define bio_integrity_set_tag(a, b, c) do { } while (0) +#define bio_integrity_get_tag(a, b, c) do { } while (0) +#define bio_integrity_init_slab(a) do { } while (0) + +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + #endif /* CONFIG_BLOCK */ #endif /* __LINUX_BIO_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6a3da6717135..4a9ed45270ff 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -112,6 +112,7 @@ enum rq_flag_bits { __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_RW_META, /* metadata io request */ __REQ_COPY_USER, /* contains copies of user pages */ + __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_NR_BITS, /* stops here */ }; @@ -134,6 +135,7 @@ enum rq_flag_bits { #define REQ_ALLOCED (1 << __REQ_ALLOCED) #define REQ_RW_META (1 << __REQ_RW_META) #define REQ_COPY_USER (1 << __REQ_COPY_USER) +#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define BLK_MAX_CDB 16 @@ -865,6 +867,109 @@ void kblockd_flush_work(struct work_struct *work); MODULE_ALIAS("block-major-" __stringify(major) "-*") +#if defined(CONFIG_BLK_DEV_INTEGRITY) + +#define INTEGRITY_FLAG_READ 1 /* verify data integrity on read */ +#define INTEGRITY_FLAG_WRITE 2 /* generate data integrity on write */ + +struct blk_integrity_exchg { + void *prot_buf; + void *data_buf; + sector_t sector; + unsigned int data_size; + unsigned short sector_size; + const char *disk_name; +}; + +typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); +typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); +typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); +typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); + +struct blk_integrity { + integrity_gen_fn *generate_fn; + integrity_vrfy_fn *verify_fn; + integrity_set_tag_fn *set_tag_fn; + integrity_get_tag_fn *get_tag_fn; + + unsigned short flags; + unsigned short tuple_size; + unsigned short sector_size; + unsigned short tag_size; + + const char *name; + + struct kobject kobj; +}; + +extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); +extern void blk_integrity_unregister(struct gendisk *); +extern int blk_integrity_compare(struct block_device *, struct block_device *); +extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); +extern int blk_rq_count_integrity_sg(struct request *); + +static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) +{ + if (bi) + return bi->tuple_size; + + return 0; +} + +static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) +{ + return bdev->bd_disk->integrity; +} + +static inline unsigned int bdev_get_tag_size(struct block_device *bdev) +{ + struct blk_integrity *bi = bdev_get_integrity(bdev); + + if (bi) + return bi->tag_size; + + return 0; +} + +static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) +{ + struct blk_integrity *bi = bdev_get_integrity(bdev); + + if (bi == NULL) + return 0; + + if (rw == READ && bi->verify_fn != NULL && + test_bit(INTEGRITY_FLAG_READ, &bi->flags)) + return 1; + + if (rw == WRITE && bi->generate_fn != NULL && + test_bit(INTEGRITY_FLAG_WRITE, &bi->flags)) + return 1; + + return 0; +} + +static inline int blk_integrity_rq(struct request *rq) +{ + BUG_ON(rq->bio == NULL); + + return bio_integrity(rq->bio); +} + +#else /* CONFIG_BLK_DEV_INTEGRITY */ + +#define blk_integrity_rq(rq) (0) +#define blk_rq_count_integrity_sg(a) (0) +#define blk_rq_map_integrity_sg(a, b) (0) +#define bdev_get_integrity(a) (0) +#define bdev_get_tag_size(a) (0) +#define blk_integrity_compare(a, b) (0) +#define blk_integrity_register(a, b) (0) +#define blk_integrity_unregister(a) do { } while (0); + +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + + #else /* CONFIG_BLOCK */ /* * stubs for when the block layer is configured out diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ae7aec3cabee..524ec96f5a23 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -141,6 +141,9 @@ struct gendisk { struct disk_stats dkstats; #endif struct work_struct async_notify; +#ifdef CONFIG_BLK_DEV_INTEGRITY + struct blk_integrity *integrity; +#endif }; /* -- cgit v1.2.1 From da9cbc87395308a21465bd25441297bbba0477e1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 Jun 2008 20:42:08 +0200 Subject: block: blkdev.h cleanup, move iocontext stuff to iocontext.h Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 17 ----------------- include/linux/iocontext.h | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4a9ed45270ff..443df75d2cde 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -33,12 +33,6 @@ struct sg_io_hdr; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ -int put_io_context(struct io_context *ioc); -void exit_io_context(void); -struct io_context *get_io_context(gfp_t gfp_flags, int node); -struct io_context *alloc_io_context(gfp_t gfp_flags, int node); -void copy_io_context(struct io_context **pdst, struct io_context **psrc); - struct request; typedef void (rq_end_io_fn)(struct request *, int); @@ -981,17 +975,6 @@ static inline long nr_blockdev_pages(void) return 0; } -static inline void exit_io_context(void) -{ -} - -struct io_context; -static inline int put_io_context(struct io_context *ioc) -{ - return 1; -} - - #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 2b7a1187cb29..08b987bccf89 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -99,4 +99,22 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc) return NULL; } +#ifdef CONFIG_BLOCK +int put_io_context(struct io_context *ioc); +void exit_io_context(void); +struct io_context *get_io_context(gfp_t gfp_flags, int node); +struct io_context *alloc_io_context(gfp_t gfp_flags, int node); +void copy_io_context(struct io_context **pdst, struct io_context **psrc); +#else +static inline void exit_io_context(void) +{ +} + +struct io_context; +static inline int put_io_context(struct io_context *ioc) +{ + return 1; +} +#endif + #endif -- cgit v1.2.1 From 6e2401ad6f33de15ff00f78b88159f00a14f3b35 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 18 Jun 2008 10:15:02 +0200 Subject: block: integrity cleanups - No need to check for NULL bio, we'll get an immediate oops anyway. - Make bio_integrity() a proper function. Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 ++++++++- include/linux/blkdev.h | 4 ---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 6bfc3e8d9d89..0933a14e6414 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -458,7 +458,14 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, #define bip_for_each_vec(bvl, bip, i) \ __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) -#define bio_integrity(bio) ((bio)->bi_integrity ? 1 : 0) +static inline int bio_integrity(struct bio *bio) +{ +#if defined(CONFIG_BLK_DEV_INTEGRITY) + return bio->bi_integrity != NULL; +#else + return 0; +#endif +} extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 443df75d2cde..d3ae9ad97213 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -860,7 +860,6 @@ void kblockd_flush_work(struct work_struct *work); #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ MODULE_ALIAS("block-major-" __stringify(major) "-*") - #if defined(CONFIG_BLK_DEV_INTEGRITY) #define INTEGRITY_FLAG_READ 1 /* verify data integrity on read */ @@ -945,8 +944,6 @@ static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) static inline int blk_integrity_rq(struct request *rq) { - BUG_ON(rq->bio == NULL); - return bio_integrity(rq->bio); } @@ -963,7 +960,6 @@ static inline int blk_integrity_rq(struct request *rq) #endif /* CONFIG_BLK_DEV_INTEGRITY */ - #else /* CONFIG_BLOCK */ /* * stubs for when the block layer is configured out -- cgit v1.2.1 From 0b07de85a76e1346e675f0e98437378932473df7 Mon Sep 17 00:00:00 2001 From: Adel Gadllah Date: Thu, 26 Jun 2008 13:48:27 +0200 Subject: allow userspace to modify scsi command filter on per device basis This patch exports the per-gendisk command filter to user space through sysfs, so it can be changed by the system administrator. All users of the old cmd filter have been converted to use the new one. Original patch from Peter Jones. Signed-off-by: Adel Gadllah Signed-off-by: Peter Jones Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 10 +++++++++- include/linux/genhd.h | 9 +++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d3ae9ad97213..a842b776d099 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -671,7 +671,6 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); -extern int blk_verify_command(unsigned char *, int); extern void blk_unplug(struct request_queue *q); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) @@ -797,6 +796,15 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, extern int blkdev_issue_flush(struct block_device *, sector_t *); +/* +* command filter functions +*/ +extern int blk_verify_command(struct file *file, unsigned char *cmd); +extern int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter, + unsigned char *cmd, mode_t *f_mode); +extern int blk_register_filter(struct gendisk *disk); +extern void blk_unregister_filter(struct gendisk *disk); + #define MAX_PHYS_SEGMENTS 128 #define MAX_HW_SEGMENTS 128 #define SAFE_MAX_SECTORS 255 diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 524ec96f5a23..e8787417f65a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -110,6 +110,14 @@ struct hd_struct { #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 #define GENHD_FL_FAIL 64 +#define BLK_SCSI_MAX_CMDS (256) +#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) + +struct blk_scsi_cmd_filter { + unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; + unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; + struct kobject kobj; +}; struct gendisk { int major; /* major number of driver */ @@ -120,6 +128,7 @@ struct gendisk { struct hd_struct **part; /* [indexed by minor] */ struct block_device_operations *fops; struct request_queue *queue; + struct blk_scsi_cmd_filter cmd_filter; void *private_data; sector_t capacity; -- cgit v1.2.1 From b24498d477a14680fc3bb3ad884fa9fa76a2d237 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 27 Jun 2008 09:12:09 +0200 Subject: block: integrity flags can't use bit ops on unsigned short Just use normal open coded bit operations instead, they need not be atomic. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a842b776d099..7ab8acad5b6e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -870,8 +870,8 @@ void kblockd_flush_work(struct work_struct *work); #if defined(CONFIG_BLK_DEV_INTEGRITY) -#define INTEGRITY_FLAG_READ 1 /* verify data integrity on read */ -#define INTEGRITY_FLAG_WRITE 2 /* generate data integrity on write */ +#define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ +#define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ struct blk_integrity_exchg { void *prot_buf; @@ -940,11 +940,11 @@ static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) return 0; if (rw == READ && bi->verify_fn != NULL && - test_bit(INTEGRITY_FLAG_READ, &bi->flags)) + (bi->flags & INTEGRITY_FLAG_READ)) return 1; if (rw == WRITE && bi->generate_fn != NULL && - test_bit(INTEGRITY_FLAG_WRITE, &bi->flags)) + (bi->flags & INTEGRITY_FLAG_WRITE)) return 1; return 0; -- cgit v1.2.1 From cc371e66e340f35eed8dc4651c7c18e754c7fb26 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 3 Jul 2008 09:53:43 +0200 Subject: Add bvec_merge_data to handle stacked devices and ->merge_bvec() When devices are stacked, one device's merge_bvec_fn may need to perform the mapping and then call one or more functions for its underlying devices. The following bio fields are used: bio->bi_sector bio->bi_bdev bio->bi_size bio->bi_rw using bio_data_dir() This patch creates a new struct bvec_merge_data holding a copy of those fields to avoid having to change them directly in the struct bio when going down the stack only to have to change them back again on the way back up. (And then when the bio gets mapped for real, the whole exercise gets repeated, but that's a problem for another day...) Signed-off-by: Alasdair G Kergon Cc: Neil Brown Cc: Milan Broz Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7ab8acad5b6e..ff9d0bdf2a16 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -254,7 +254,14 @@ typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); struct bio_vec; -typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *); +struct bvec_merge_data { + struct block_device *bi_bdev; + sector_t bi_sector; + unsigned bi_size; + unsigned long bi_rw; +}; +typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, + struct bio_vec *); typedef void (prepare_flush_fn) (struct request_queue *, struct request *); typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); -- cgit v1.2.1 From e48ec69005f02b70b7ecfde1bc39a599086d16ef Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Jul 2008 13:18:54 +0200 Subject: block: extend queue_flag bitops Add test_and_clear and test_and_set. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ff9d0bdf2a16..e04c4ac8a7cf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -428,6 +428,32 @@ static inline void queue_flag_set_unlocked(unsigned int flag, __set_bit(flag, &q->queue_flags); } +static inline int queue_flag_test_and_clear(unsigned int flag, + struct request_queue *q) +{ + WARN_ON_ONCE(!queue_is_locked(q)); + + if (test_bit(flag, &q->queue_flags)) { + __clear_bit(flag, &q->queue_flags); + return 1; + } + + return 0; +} + +static inline int queue_flag_test_and_set(unsigned int flag, + struct request_queue *q) +{ + WARN_ON_ONCE(!queue_is_locked(q)); + + if (!test_bit(flag, &q->queue_flags)) { + __set_bit(flag, &q->queue_flags); + return 0; + } + + return 1; +} + static inline void queue_flag_set(unsigned int flag, struct request_queue *q) { WARN_ON_ONCE(!queue_is_locked(q)); -- cgit v1.2.1 From 42796d37da6ef4fd851dc6d5d0387baf7e2b0c3c Mon Sep 17 00:00:00 2001 From: eric miao Date: Mon, 14 Apr 2008 09:35:08 +0100 Subject: [ARM] pxa: add generic PWM backlight driver Patch mostly from Eric Miao, with minor edits by rmk to convert Eric's driver to a generic PWM-based backlight driver. Signed-off-by: eric miao Signed-off-by: Russell King --- include/linux/pwm_backlight.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/linux/pwm_backlight.h (limited to 'include/linux') diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h new file mode 100644 index 000000000000..aeeffedbe822 --- /dev/null +++ b/include/linux/pwm_backlight.h @@ -0,0 +1,14 @@ +/* + * Generic PWM backlight driver data - see drivers/video/backlight/pwm_bl.c + */ +#ifndef __LINUX_PWM_BACKLIGHT_H +#define __LINUX_PWM_BACKLIGHT_H + +struct platform_pwm_backlight_data { + int pwm_id; + unsigned int max_brightness; + unsigned int dft_brightness; + unsigned int pwm_period_ns; +}; + +#endif -- cgit v1.2.1 From 3b73125af69f93972625f4b655675f42ca4274eb Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 22 May 2008 14:18:40 +0100 Subject: [ARM] 5044/1: pwm_bl: add init/notify/exit callbacks This allows platform code to manipulate GPIOs and brightness level as needed. Signed-off-by: Philipp Zabel Signed-off-by: Russell King --- include/linux/pwm_backlight.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h index aeeffedbe822..7a9754c96775 100644 --- a/include/linux/pwm_backlight.h +++ b/include/linux/pwm_backlight.h @@ -9,6 +9,9 @@ struct platform_pwm_backlight_data { unsigned int max_brightness; unsigned int dft_brightness; unsigned int pwm_period_ns; + int (*init)(struct device *dev); + int (*notify)(int brightness); + void (*exit)(struct device *dev); }; #endif -- cgit v1.2.1 From 27f8221af406e43b529a5425bc99c9b1e9bdf521 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 4 Jul 2008 09:30:03 +0200 Subject: block: add blk_queue_update_dma_pad This adds blk_queue_update_dma_pad to prevent LLDs from overwriting the dma pad mask wrongly (we added blk_queue_update_dma_alignment due to the same reason). This also converts libata to use blk_queue_update_dma_pad instead of blk_queue_dma_pad. Signed-off-by: FUJITA Tomonori Cc: Tejun Heo Cc: Bartlomiej Zolnierkiewicz Cc: Thomas Bogendoerfer Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e04c4ac8a7cf..1ffd8bfdc4c9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -776,6 +776,7 @@ extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_dma_pad(struct request_queue *, unsigned int); +extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); -- cgit v1.2.1 From d2dbf343329dc777d77488743465f7be4245971d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 13 Jun 2008 02:00:56 -0700 Subject: x86: clean up reserve_bootmem_generic() and port it to 32-bit 1. add reserve_bootmem_generic for 32bit 2. change len to unsigned long 3. make early_res_to_bootmem to use it Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/bootmem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 686895bacd9d..a1d9b79078ea 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -84,6 +84,8 @@ extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); __alloc_bootmem_low(x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ +extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, + int flags); extern unsigned long free_all_bootmem(void); extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); extern void *__alloc_bootmem_node(pg_data_t *pgdat, -- cgit v1.2.1 From cc1050bafebfb1d7935331282e948b5016318192 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 13 Jun 2008 19:08:52 -0700 Subject: x86: replace shrink_active_range() with remove_active_range() in case we have kva before ramdisk on a node, we still need to use those ranges. v2: reserve_early kva ram area, in case there are holes in highmem, to avoid those area could be treat as free high pages. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ce8e397a61f6..034a3156d2f0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -998,7 +998,8 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat, extern void free_area_init_nodes(unsigned long *max_zone_pfn); extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn); +extern void remove_active_range(unsigned int nid, unsigned long start_pfn, + unsigned long end_pfn); extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); -- cgit v1.2.1 From b5bc6c0e55000dab86b73f838f5ad02908b23755 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 14 Jun 2008 18:32:52 -0700 Subject: x86, mm: use add_highpages_with_active_regions() for high pages init v2 use early_node_map to init high pages, so we can remove page_is_ram() and page_is_reserved_early() in the big loop with add_one_highpage also remove page_is_reserved_early(), it is not needed anymore. v2: fix the build of other platforms Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 034a3156d2f0..e4de460907c1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,6 +1011,8 @@ extern unsigned long find_min_pfn_with_active_regions(void); extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); +typedef void (*work_fn_t)(unsigned long, unsigned long, void *); +extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID extern int early_pfn_to_nid(unsigned long pfn); -- cgit v1.2.1 From 3461b0af025251bbc6b3d56c821c6ac2de6f7209 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: x86: remove static boot_cpu_pda array v2 * Remove the boot_cpu_pda array and pointer table from the data section. Allocate the pointer table and array during init. do_boot_cpu() will reallocate the pda in node local memory and if the cpu is being brought up before the bootmem array is released (after_bootmem = 0), then it will free the initial pda. This will happen for all cpus present at system startup. This removes 512k + 32k bytes from the data section. For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 586a943cab01..0ea48a5af823 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1024,6 +1024,7 @@ extern void mem_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); +extern int after_bootmem; #ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); -- cgit v1.2.1 From a7bf0bd5e6af7fe69342dabf2a3b721f0163469a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 May 2008 15:02:14 +0100 Subject: build: add __page_aligned_data and __page_aligned_bss Making a variable page-aligned by using __attribute__((section(".data.page_aligned"))) is fragile because if sizeof(variable) is not also a multiple of page size, it leaves variables in the remainder of the section unaligned. This patch introduces two new qualifiers, __page_aligned_data and __page_aligned_bss to set the section *and* the alignment of variables. This makes page-aligned variables more robust because the linker will make sure they're aligned properly. Unfortunately it requires *all* page-aligned data to use these macros... Signed-off-by: Ingo Molnar --- include/linux/linkage.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 2119610b24f8..9fd1f859021b 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -1,6 +1,7 @@ #ifndef _LINUX_LINKAGE_H #define _LINUX_LINKAGE_H +#include #include #ifdef __cplusplus @@ -17,6 +18,9 @@ # define asmregparm #endif +#define __page_aligned_data __section(.data.page_aligned) __aligned(PAGE_SIZE) +#define __page_aligned_bss __section(.bss.page_aligned) __aligned(PAGE_SIZE) + /* * This is used by architectures to keep arguments on the stack * untouched by the compiler by keeping them live until the end. -- cgit v1.2.1 From d52d53b8a5b258bfaab9223a5e7284fcfdd48577 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Jun 2008 20:10:55 -0700 Subject: RFC x86: try to remove arch_get_ram_range want to remove arch_get_ram_range, and use early_node_map instead. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3d647b24041f..cf1cd3a2ed78 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,7 +1011,7 @@ extern unsigned long find_min_pfn_with_active_regions(void); extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); -typedef void (*work_fn_t)(unsigned long, unsigned long, void *); +typedef int (*work_fn_t)(unsigned long, unsigned long, void *); extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -- cgit v1.2.1 From 3c999f142665265afd0fe9190204dd051f17e505 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 20 Jun 2008 16:11:20 -0700 Subject: x86: check command line when CONFIG_X86_MPPARSE is not set, v2 if acpi=off, acpi=noirq and pci=noacpi, we need to disable apic. Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: "Maciej W. Rozycki" Signed-off-by: Ingo Molnar --- include/linux/acpi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 41f7ce7edd7a..0601075d09a1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -82,6 +82,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); +int acpi_mps_check (void); int acpi_numa_init (void); int acpi_table_init (void); @@ -250,6 +251,11 @@ static inline int acpi_boot_table_init(void) return 0; } +static inline int acpi_mps_check(void) +{ + return 0; +} + static inline int acpi_check_resource_conflict(struct resource *res) { return 0; -- cgit v1.2.1 From 69ac9cd629ca96e59f34eb4ccd12d00b2c8276a7 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Fri, 27 Jun 2008 13:12:54 +0200 Subject: sysfs: add /sys/firmware/memmap This patch adds /sys/firmware/memmap interface that represents the BIOS (or Firmware) provided memory map. The tree looks like: /sys/firmware/memmap/0/start (hex number) end (hex number) type (string) ... /1/start end type With the following shell snippet one can print the memory map in the same form the kernel prints itself when booting on x86 (the E820 map). --------- 8< -------------------------- #!/bin/sh cd /sys/firmware/memmap for dir in * ; do start=$(cat $dir/start) end=$(cat $dir/end) type=$(cat $dir/type) printf "%016x-%016x (%s)\n" $start $[ $end +1] "$type" done --------- >8 -------------------------- That patch only provides the needed interface: 1. The sysfs interface. 2. The structure and enumeration definition. 3. The function firmware_map_add() and firmware_map_add_early() that should be called from architecture code (E820/EFI, for example) to add the contents to the interface. If the kernel is compiled without CONFIG_FIRMWARE_MEMMAP, the interface does nothing without cluttering the architecture-specific code with #ifdef's. The purpose of the new interface is kexec: While /proc/iomem represents the *used* memory map (e.g. modified via kernel parameters like 'memmap' and 'mem'), the /sys/firmware/memmap tree represents the unmodified memory map provided via the firmware. So kexec can: - use the original memory map for rebooting, - use the /proc/iomem for setting up the ELF core headers for kdump case that should only represent the memory of the system. The patch has been tested on i386 and x86_64. Signed-off-by: Bernhard Walle Acked-by: Greg KH Acked-by: Vivek Goyal Cc: kexec@lists.infradead.org Cc: yhlu.kernel@gmail.com Signed-off-by: Ingo Molnar --- include/linux/firmware-map.h | 74 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 include/linux/firmware-map.h (limited to 'include/linux') diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h new file mode 100644 index 000000000000..acbdbcc16051 --- /dev/null +++ b/include/linux/firmware-map.h @@ -0,0 +1,74 @@ +/* + * include/linux/firmware-map.h: + * Copyright (C) 2008 SUSE LINUX Products GmbH + * by Bernhard Walle + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License v2.0 as published by + * the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef _LINUX_FIRMWARE_MAP_H +#define _LINUX_FIRMWARE_MAP_H + +#include +#include + +/* + * provide a dummy interface if CONFIG_FIRMWARE_MEMMAP is disabled + */ +#ifdef CONFIG_FIRMWARE_MEMMAP + +/** + * Adds a firmware mapping entry. This function uses kmalloc() for memory + * allocation. Use firmware_map_add_early() if you want to use the bootmem + * allocator. + * + * That function must be called before late_initcall. + * + * @start: Start of the memory range. + * @end: End of the memory range (inclusive). + * @type: Type of the memory range. + * + * Returns 0 on success, or -ENOMEM if no memory could be allocated. + */ +int firmware_map_add(resource_size_t start, resource_size_t end, + const char *type); + +/** + * Adds a firmware mapping entry. This function uses the bootmem allocator + * for memory allocation. Use firmware_map_add() if you want to use kmalloc(). + * + * That function must be called before late_initcall. + * + * @start: Start of the memory range. + * @end: End of the memory range (inclusive). + * @type: Type of the memory range. + * + * Returns 0 on success, or -ENOMEM if no memory could be allocated. + */ +int firmware_map_add_early(resource_size_t start, resource_size_t end, + const char *type); + +#else /* CONFIG_FIRMWARE_MEMMAP */ + +static inline int firmware_map_add(resource_size_t start, resource_size_t end, + const char *type) +{ + return 0; +} + +static inline int firmware_map_add_early(resource_size_t start, + resource_size_t end, const char *type) +{ + return 0; +} + +#endif /* CONFIG_FIRMWARE_MEMMAP */ + +#endif /* _LINUX_FIRMWARE_MAP_H */ -- cgit v1.2.1 From 004a403c2e954734090a69aedc7f4f822bdcc142 Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Wed, 14 May 2008 20:41:47 +0800 Subject: [CRYPTO] hash: Add asynchronous hash support This patch adds asynchronous hash and digest support. Signed-off-by: Loc Ho Signed-off-by: Herbert Xu --- include/linux/crypto.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 183 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 425824bd49f3..b6efe569128d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -30,15 +30,17 @@ */ #define CRYPTO_ALG_TYPE_MASK 0x0000000f #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 -#define CRYPTO_ALG_TYPE_DIGEST 0x00000002 -#define CRYPTO_ALG_TYPE_HASH 0x00000003 +#define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 +#define CRYPTO_ALG_TYPE_AEAD 0x00000003 #define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006 -#define CRYPTO_ALG_TYPE_COMPRESS 0x00000008 -#define CRYPTO_ALG_TYPE_AEAD 0x00000009 +#define CRYPTO_ALG_TYPE_DIGEST 0x00000008 +#define CRYPTO_ALG_TYPE_HASH 0x00000009 +#define CRYPTO_ALG_TYPE_AHASH 0x0000000a #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e +#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000c #define CRYPTO_ALG_TYPE_BLKCIPHER_MASK 0x0000000c #define CRYPTO_ALG_LARVAL 0x00000010 @@ -102,6 +104,7 @@ struct crypto_async_request; struct crypto_aead; struct crypto_blkcipher; struct crypto_hash; +struct crypto_ahash; struct crypto_tfm; struct crypto_type; struct aead_givcrypt_request; @@ -131,6 +134,18 @@ struct ablkcipher_request { void *__ctx[] CRYPTO_MINALIGN_ATTR; }; +struct ahash_request { + struct crypto_async_request base; + + void *info; + + unsigned int nbytes; + struct scatterlist *src; + u8 *result; + + void *__ctx[] CRYPTO_MINALIGN_ATTR; +}; + /** * struct aead_request - AEAD request * @base: Common attributes for async crypto requests @@ -195,6 +210,17 @@ struct ablkcipher_alg { unsigned int ivsize; }; +struct ahash_alg { + int (*init)(struct ahash_request *req); + int (*update)(struct ahash_request *req); + int (*final)(struct ahash_request *req); + int (*digest)(struct ahash_request *req); + int (*setkey)(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen); + + unsigned int digestsize; +}; + struct aead_alg { int (*setkey)(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); @@ -272,6 +298,7 @@ struct compress_alg { #define cra_cipher cra_u.cipher #define cra_digest cra_u.digest #define cra_hash cra_u.hash +#define cra_ahash cra_u.ahash #define cra_compress cra_u.compress struct crypto_alg { @@ -298,6 +325,7 @@ struct crypto_alg { struct cipher_alg cipher; struct digest_alg digest; struct hash_alg hash; + struct ahash_alg ahash; struct compress_alg compress; } cra_u; @@ -383,6 +411,19 @@ struct hash_tfm { unsigned int digestsize; }; +struct ahash_tfm { + int (*init)(struct ahash_request *req); + int (*update)(struct ahash_request *req); + int (*final)(struct ahash_request *req); + int (*digest)(struct ahash_request *req); + int (*setkey)(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen); + + unsigned int digestsize; + struct crypto_ahash *base; + unsigned int reqsize; +}; + struct compress_tfm { int (*cot_compress)(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, @@ -397,6 +438,7 @@ struct compress_tfm { #define crt_blkcipher crt_u.blkcipher #define crt_cipher crt_u.cipher #define crt_hash crt_u.hash +#define crt_ahash crt_u.ahash #define crt_compress crt_u.compress struct crypto_tfm { @@ -409,6 +451,7 @@ struct crypto_tfm { struct blkcipher_tfm blkcipher; struct cipher_tfm cipher; struct hash_tfm hash; + struct ahash_tfm ahash; struct compress_tfm compress; } crt_u; @@ -441,6 +484,10 @@ struct crypto_hash { struct crypto_tfm base; }; +struct crypto_ahash { + struct crypto_tfm base; +}; + enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -1264,5 +1311,137 @@ static inline int crypto_comp_decompress(struct crypto_comp *tfm, src, slen, dst, dlen); } +static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm) +{ + return (struct crypto_ahash *)tfm; +} + +static inline struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, + u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + mask &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_AHASH; + mask |= CRYPTO_ALG_TYPE_AHASH_MASK; + + return __crypto_ahash_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm) +{ + return &tfm->base; +} + +static inline void crypto_free_ahash(struct crypto_ahash *tfm) +{ + crypto_free_tfm(crypto_ahash_tfm(tfm)); +} + +static inline unsigned int crypto_ahash_alignmask( + struct crypto_ahash *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_ahash_tfm(tfm)); +} + +static inline struct ahash_tfm *crypto_ahash_crt(struct crypto_ahash *tfm) +{ + return &crypto_ahash_tfm(tfm)->crt_ahash; +} + +static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm) +{ + return crypto_ahash_crt(tfm)->digestsize; +} + +static inline u32 crypto_ahash_get_flags(struct crypto_ahash *tfm) +{ + return crypto_tfm_get_flags(crypto_ahash_tfm(tfm)); +} + +static inline void crypto_ahash_set_flags(struct crypto_ahash *tfm, u32 flags) +{ + crypto_tfm_set_flags(crypto_ahash_tfm(tfm), flags); +} + +static inline void crypto_ahash_clear_flags(struct crypto_ahash *tfm, u32 flags) +{ + crypto_tfm_clear_flags(crypto_ahash_tfm(tfm), flags); +} + +static inline struct crypto_ahash *crypto_ahash_reqtfm( + struct ahash_request *req) +{ + return __crypto_ahash_cast(req->base.tfm); +} + +static inline unsigned int crypto_ahash_reqsize(struct crypto_ahash *tfm) +{ + return crypto_ahash_crt(tfm)->reqsize; +} + +static inline int crypto_ahash_setkey(struct crypto_ahash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ahash_tfm *crt = crypto_ahash_crt(tfm); + + return crt->setkey(crt->base, key, keylen); +} + +static inline int crypto_ahash_digest(struct ahash_request *req) +{ + struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); + return crt->digest(req); +} + +static inline void ahash_request_set_tfm(struct ahash_request *req, + struct crypto_ahash *tfm) +{ + req->base.tfm = crypto_ahash_tfm(crypto_ahash_crt(tfm)->base); +} + +static inline struct ahash_request *ahash_request_alloc( + struct crypto_ahash *tfm, gfp_t gfp) +{ + struct ahash_request *req; + + req = kmalloc(sizeof(struct ahash_request) + + crypto_ahash_reqsize(tfm), gfp); + + if (likely(req)) + ahash_request_set_tfm(req, tfm); + + return req; +} + +static inline void ahash_request_free(struct ahash_request *req) +{ + kfree(req); +} + +static inline struct ahash_request *ahash_request_cast( + struct crypto_async_request *req) +{ + return container_of(req, struct ahash_request, base); +} + +static inline void ahash_request_set_callback(struct ahash_request *req, + u32 flags, + crypto_completion_t complete, + void *data) +{ + req->base.complete = complete; + req->base.data = data; + req->base.flags = flags; +} + +static inline void ahash_request_set_crypt(struct ahash_request *req, + struct scatterlist *src, u8 *result, + unsigned int nbytes) +{ + req->src = src; + req->nbytes = nbytes; + req->result = result; +} + #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.1 From 166247f46a9c866e6f7f7d2212be875fb82212a1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Jul 2008 20:54:35 +0800 Subject: crypto: hash - Removed vestigial ahash fields The base field in ahash_tfm appears to have been cut-n-pasted from ablkcipher. It isn't needed here at all. Similarly, the info field in ahash_request also appears to have originated from its cipher counter-part and is vestigial. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b6efe569128d..68ef293644d3 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -137,8 +137,6 @@ struct ablkcipher_request { struct ahash_request { struct crypto_async_request base; - void *info; - unsigned int nbytes; struct scatterlist *src; u8 *result; @@ -420,7 +418,6 @@ struct ahash_tfm { unsigned int keylen); unsigned int digestsize; - struct crypto_ahash *base; unsigned int reqsize; }; @@ -1384,7 +1381,7 @@ static inline int crypto_ahash_setkey(struct crypto_ahash *tfm, { struct ahash_tfm *crt = crypto_ahash_crt(tfm); - return crt->setkey(crt->base, key, keylen); + return crt->setkey(tfm, key, keylen); } static inline int crypto_ahash_digest(struct ahash_request *req) @@ -1396,7 +1393,7 @@ static inline int crypto_ahash_digest(struct ahash_request *req) static inline void ahash_request_set_tfm(struct ahash_request *req, struct crypto_ahash *tfm) { - req->base.tfm = crypto_ahash_tfm(crypto_ahash_crt(tfm)->base); + req->base.tfm = crypto_ahash_tfm(tfm); } static inline struct ahash_request *ahash_request_alloc( -- cgit v1.2.1 From 18e33e6d5cc0495826f5245777cd267732815e01 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 10 Jul 2008 16:01:22 +0800 Subject: crypto: hash - Move ahash functions into crypto/hash.h All new crypto interfaces should go into individual files as much as possible in order to ensure that crypto.h does not collapse under its own weight. This patch moves the ahash code into crypto/hash.h and crypto/internal/hash.h respectively. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 136 ------------------------------------------------- 1 file changed, 136 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 68ef293644d3..c43dc47fdf75 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -481,10 +481,6 @@ struct crypto_hash { struct crypto_tfm base; }; -struct crypto_ahash { - struct crypto_tfm base; -}; - enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -1308,137 +1304,5 @@ static inline int crypto_comp_decompress(struct crypto_comp *tfm, src, slen, dst, dlen); } -static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm) -{ - return (struct crypto_ahash *)tfm; -} - -static inline struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, - u32 type, u32 mask) -{ - type &= ~CRYPTO_ALG_TYPE_MASK; - mask &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_AHASH; - mask |= CRYPTO_ALG_TYPE_AHASH_MASK; - - return __crypto_ahash_cast(crypto_alloc_base(alg_name, type, mask)); -} - -static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm) -{ - return &tfm->base; -} - -static inline void crypto_free_ahash(struct crypto_ahash *tfm) -{ - crypto_free_tfm(crypto_ahash_tfm(tfm)); -} - -static inline unsigned int crypto_ahash_alignmask( - struct crypto_ahash *tfm) -{ - return crypto_tfm_alg_alignmask(crypto_ahash_tfm(tfm)); -} - -static inline struct ahash_tfm *crypto_ahash_crt(struct crypto_ahash *tfm) -{ - return &crypto_ahash_tfm(tfm)->crt_ahash; -} - -static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm) -{ - return crypto_ahash_crt(tfm)->digestsize; -} - -static inline u32 crypto_ahash_get_flags(struct crypto_ahash *tfm) -{ - return crypto_tfm_get_flags(crypto_ahash_tfm(tfm)); -} - -static inline void crypto_ahash_set_flags(struct crypto_ahash *tfm, u32 flags) -{ - crypto_tfm_set_flags(crypto_ahash_tfm(tfm), flags); -} - -static inline void crypto_ahash_clear_flags(struct crypto_ahash *tfm, u32 flags) -{ - crypto_tfm_clear_flags(crypto_ahash_tfm(tfm), flags); -} - -static inline struct crypto_ahash *crypto_ahash_reqtfm( - struct ahash_request *req) -{ - return __crypto_ahash_cast(req->base.tfm); -} - -static inline unsigned int crypto_ahash_reqsize(struct crypto_ahash *tfm) -{ - return crypto_ahash_crt(tfm)->reqsize; -} - -static inline int crypto_ahash_setkey(struct crypto_ahash *tfm, - const u8 *key, unsigned int keylen) -{ - struct ahash_tfm *crt = crypto_ahash_crt(tfm); - - return crt->setkey(tfm, key, keylen); -} - -static inline int crypto_ahash_digest(struct ahash_request *req) -{ - struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); - return crt->digest(req); -} - -static inline void ahash_request_set_tfm(struct ahash_request *req, - struct crypto_ahash *tfm) -{ - req->base.tfm = crypto_ahash_tfm(tfm); -} - -static inline struct ahash_request *ahash_request_alloc( - struct crypto_ahash *tfm, gfp_t gfp) -{ - struct ahash_request *req; - - req = kmalloc(sizeof(struct ahash_request) + - crypto_ahash_reqsize(tfm), gfp); - - if (likely(req)) - ahash_request_set_tfm(req, tfm); - - return req; -} - -static inline void ahash_request_free(struct ahash_request *req) -{ - kfree(req); -} - -static inline struct ahash_request *ahash_request_cast( - struct crypto_async_request *req) -{ - return container_of(req, struct ahash_request, base); -} - -static inline void ahash_request_set_callback(struct ahash_request *req, - u32 flags, - crypto_completion_t complete, - void *data) -{ - req->base.complete = complete; - req->base.data = data; - req->base.flags = flags; -} - -static inline void ahash_request_set_crypt(struct ahash_request *req, - struct scatterlist *src, u8 *result, - unsigned int nbytes) -{ - req->src = src; - req->nbytes = nbytes; - req->result = result; -} - #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.1 From a2bb6a3d85ef3124cd336403a95abc0540d3fbe2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Jul 2008 20:58:15 -0400 Subject: ftrace: add ftrace_kill_atomic It has been suggested that I add a way to disable the function tracer on an oops. This code adds a ftrace_kill_atomic. It is not meant to be used in normal situations. It will disable the ftrace tracer, but will not perform the nice shutdown that requires scheduling. Signed-off-by: Steven Rostedt Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3121b95443d9..f368d041e02d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -89,6 +89,7 @@ void ftrace_enable_daemon(void); /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); +void ftrace_kill_atomic(void); static inline void tracer_disable(void) { -- cgit v1.2.1 From af52a90a14cdaa54ecbfb6e6982abb13466a4b56 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 7 Jul 2008 14:16:52 -0400 Subject: sched_clock: stop maximum check on NO HZ Working with ftrace I would get large jumps of 11 millisecs or more with the clock tracer. This killed the latencing timings of ftrace and also caused the irqoff self tests to fail. What was happening is with NO_HZ the idle would stop the jiffy counter and before the jiffy counter was updated the sched_clock would have a bad delta jiffies to compare with the gtod with the maximum. The jiffies would stop and the last sched_tick would record the last gtod. On wakeup, the sched clock update would compare the gtod + delta jiffies (which would be zero) and compare it to the TSC. The TSC would have correctly (with a stable TSC) moved forward several jiffies. But because the jiffies has not been updated yet the clock would be prevented from moving forward because it would appear that the TSC jumped too far ahead. The clock would then virtually stop, until the jiffies are updated. Then the next sched clock update would see that the clock was very much behind since the delta jiffies is now correct. This would then jump the clock forward by several jiffies. This caused ftrace to report several milliseconds of interrupts off latency at every resume from NO_HZ idle. This patch adds hooks into the nohz code to disable the checking of the maximum clock update when nohz is in effect. It resumes the max check when nohz has updated the jiffies again. Signed-off-by: Steven Rostedt Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/sched.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c5d3f847ca8d..33a8f42041fa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1573,13 +1573,28 @@ static inline void sched_clock_idle_sleep_event(void) static inline void sched_clock_idle_wakeup_event(u64 delta_ns) { } -#else + +#ifdef CONFIG_NO_HZ +static inline void sched_clock_tick_stop(int cpu) +{ +} + +static inline void sched_clock_tick_start(int cpu) +{ +} +#endif + +#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ extern void sched_clock_init(void); extern u64 sched_clock_cpu(int cpu); extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); +#ifdef CONFIG_NO_HZ +extern void sched_clock_tick_stop(int cpu); +extern void sched_clock_tick_start(int cpu); #endif +#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu -- cgit v1.2.1 From 006ebb40d3d65338bd74abb03b945f8d60e362bd Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 19 May 2008 08:32:49 -0400 Subject: Security: split proc ptrace checking into read vs. attach Enable security modules to distinguish reading of process state via proc from full ptrace access by renaming ptrace_may_attach to ptrace_may_access and adding a mode argument indicating whether only read access or full attach access is requested. This allows security modules to permit access to reading process state without granting full ptrace access. The base DAC/capability checking remains unchanged. Read access to /proc/pid/mem continues to apply a full ptrace attach check since check_mem_permission() already requires the current task to already be ptracing the target. The other ptrace checks within proc for elements like environ, maps, and fds are changed to pass the read mode instead of attach. In the SELinux case, we model such reading of process state as a reading of a proc file labeled with the target process' label. This enables SELinux policy to permit such reading of process state without permitting control or manipulation of the target process, as there are a number of cases where programs probe for such information via proc but do not need to be able to control the target (e.g. procps, lsof, PolicyKit, ConsoleKit). At present we have to choose between allowing full ptrace in policy (more permissive than required/desired) or breaking functionality (or in some cases just silencing the denials via dontaudit rules but this can hide genuine attacks). This version of the patch incorporates comments from Casey Schaufler (change/replace existing ptrace_may_attach interface, pass access mode), and Chris Wright (provide greater consistency in the checking). Note that like their predecessors __ptrace_may_attach and ptrace_may_attach, the __ptrace_may_access and ptrace_may_access interfaces use different return value conventions from each other (0 or -errno vs. 1 or 0). I retained this difference to avoid any changes to the caller logic but made the difference clearer by changing the latter interface to return a bool rather than an int and by adding a comment about it to ptrace.h for any future callers. Signed-off-by: Stephen Smalley Acked-by: Chris Wright Signed-off-by: James Morris --- include/linux/ptrace.h | 8 ++++++-- include/linux/security.h | 16 +++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index f98501ba557e..c6f5f9dd0cee 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -95,8 +95,12 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void ptrace_untrace(struct task_struct *child); -extern int ptrace_may_attach(struct task_struct *task); -extern int __ptrace_may_attach(struct task_struct *task); +#define PTRACE_MODE_READ 1 +#define PTRACE_MODE_ATTACH 2 +/* Returns 0 on success, -errno on denial. */ +extern int __ptrace_may_access(struct task_struct *task, unsigned int mode); +/* Returns true on success, false on denial. */ +extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); static inline int ptrace_reparented(struct task_struct *child) { diff --git a/include/linux/security.h b/include/linux/security.h index 50737c70e78e..62bd80cb7f87 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -46,7 +46,8 @@ struct audit_krule; */ extern int cap_capable(struct task_struct *tsk, int cap); extern int cap_settime(struct timespec *ts, struct timezone *tz); -extern int cap_ptrace(struct task_struct *parent, struct task_struct *child); +extern int cap_ptrace(struct task_struct *parent, struct task_struct *child, + unsigned int mode); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); extern int cap_capset_check(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); extern void cap_capset_set(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); @@ -1170,6 +1171,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * attributes would be changed by the execve. * @parent contains the task_struct structure for parent process. * @child contains the task_struct structure for child process. + * @mode contains the PTRACE_MODE flags indicating the form of access. * Return 0 if permission is granted. * @capget: * Get the @effective, @inheritable, and @permitted capability sets for @@ -1295,7 +1297,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) struct security_operations { char name[SECURITY_NAME_MAX + 1]; - int (*ptrace) (struct task_struct *parent, struct task_struct *child); + int (*ptrace) (struct task_struct *parent, struct task_struct *child, + unsigned int mode); int (*capget) (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); @@ -1573,7 +1576,8 @@ extern struct dentry *securityfs_create_dir(const char *name, struct dentry *par extern void securityfs_remove(struct dentry *dentry); /* Security operations */ -int security_ptrace(struct task_struct *parent, struct task_struct *child); +int security_ptrace(struct task_struct *parent, struct task_struct *child, + unsigned int mode); int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, @@ -1755,9 +1759,11 @@ static inline int security_init(void) return 0; } -static inline int security_ptrace(struct task_struct *parent, struct task_struct *child) +static inline int security_ptrace(struct task_struct *parent, + struct task_struct *child, + unsigned int mode) { - return cap_ptrace(parent, child); + return cap_ptrace(parent, child, mode); } static inline int security_capget(struct task_struct *target, -- cgit v1.2.1 From 2069f457848f846cb31149c9aa29b330a6b66d1b Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 4 Jul 2008 09:47:13 +1000 Subject: LSM/SELinux: show LSM mount options in /proc/mounts This patch causes SELinux mount options to show up in /proc/mounts. As with other code in the area seq_put errors are ignored. Other LSM's will not have their mount options displayed until they fill in their own security_sb_show_options() function. Signed-off-by: Eric Paris Signed-off-by: Miklos Szeredi Signed-off-by: James Morris --- include/linux/security.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 62bd80cb7f87..c8ad8ec684b4 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -80,6 +80,7 @@ struct xfrm_selector; struct xfrm_policy; struct xfrm_state; struct xfrm_user_sec_ctx; +struct seq_file; extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); extern int cap_netlink_recv(struct sk_buff *skb, int cap); @@ -1331,6 +1332,7 @@ struct security_operations { void (*sb_free_security) (struct super_block *sb); int (*sb_copy_data) (char *orig, char *copy); int (*sb_kern_mount) (struct super_block *sb, void *data); + int (*sb_show_options) (struct seq_file *m, struct super_block *sb); int (*sb_statfs) (struct dentry *dentry); int (*sb_mount) (char *dev_name, struct path *path, char *type, unsigned long flags, void *data); @@ -1610,6 +1612,7 @@ int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); int security_sb_copy_data(char *orig, char *copy); int security_sb_kern_mount(struct super_block *sb, void *data); +int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(char *dev_name, struct path *path, char *type, unsigned long flags, void *data); @@ -1887,6 +1890,12 @@ static inline int security_sb_kern_mount(struct super_block *sb, void *data) return 0; } +static inline int security_sb_show_options(struct seq_file *m, + struct super_block *sb) +{ + return 0; +} + static inline int security_sb_statfs(struct dentry *dentry) { return 0; -- cgit v1.2.1 From b478a9f9889c81e88077d1495daadee64c0af541 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 3 Jul 2008 20:56:04 +0200 Subject: security: remove unused sb_get_mnt_opts hook The sb_get_mnt_opts() hook is unused, and is superseded by the sb_show_options() hook. Signed-off-by: Miklos Szeredi Acked-by: James Morris --- include/linux/security.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index c8ad8ec684b4..43c6357568a3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -291,10 +291,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Update module state after a successful pivot. * @old_path contains the path for the old root. * @new_path contains the path for the new root. - * @sb_get_mnt_opts: - * Get the security relevant mount options used for a superblock - * @sb the superblock to get security mount options from - * @opts binary data structure containing all lsm mount data * @sb_set_mnt_opts: * Set the security relevant mount options used for a superblock * @sb the superblock to set security mount options for @@ -1348,8 +1344,6 @@ struct security_operations { struct path *new_path); void (*sb_post_pivotroot) (struct path *old_path, struct path *new_path); - int (*sb_get_mnt_opts) (const struct super_block *sb, - struct security_mnt_opts *opts); int (*sb_set_mnt_opts) (struct super_block *sb, struct security_mnt_opts *opts); void (*sb_clone_mnt_opts) (const struct super_block *oldsb, @@ -1624,8 +1618,6 @@ void security_sb_post_remount(struct vfsmount *mnt, unsigned long flags, void *d void security_sb_post_addmount(struct vfsmount *mnt, struct path *mountpoint); int security_sb_pivotroot(struct path *old_path, struct path *new_path); void security_sb_post_pivotroot(struct path *old_path, struct path *new_path); -int security_sb_get_mnt_opts(const struct super_block *sb, - struct security_mnt_opts *opts); int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts); void security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb); @@ -1942,12 +1934,6 @@ static inline int security_sb_pivotroot(struct path *old_path, static inline void security_sb_post_pivotroot(struct path *old_path, struct path *new_path) { } -static inline int security_sb_get_mnt_opts(const struct super_block *sb, - struct security_mnt_opts *opts) -{ - security_init_mnt_opts(opts); - return 0; -} static inline int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts) -- cgit v1.2.1 From 6f0f0fd496333777d53daff21a4e3b28c4d03a6d Mon Sep 17 00:00:00 2001 From: James Morris Date: Thu, 10 Jul 2008 17:02:07 +0900 Subject: security: remove register_security hook The register security hook is no longer required, as the capability module is always registered. LSMs wishing to stack capability as a secondary module should do so explicitly. Signed-off-by: James Morris Acked-by: Stephen Smalley Acked-by: Greg Kroah-Hartman --- include/linux/security.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 43c6357568a3..31c8851ec5d0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1239,11 +1239,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @pages contains the number of pages. * Return 0 if permission is granted. * - * @register_security: - * allow module stacking. - * @name contains the name of the security module being stacked. - * @ops contains a pointer to the struct security_operations of the module to stack. - * * @secid_to_secctx: * Convert secid to security context. * @secid contains the security ID. @@ -1471,10 +1466,6 @@ struct security_operations { int (*netlink_send) (struct sock *sk, struct sk_buff *skb); int (*netlink_recv) (struct sk_buff *skb, int cap); - /* allow module stacking */ - int (*register_security) (const char *name, - struct security_operations *ops); - void (*d_instantiate) (struct dentry *dentry, struct inode *inode); int (*getprocattr) (struct task_struct *p, char *name, char **value); @@ -1564,7 +1555,6 @@ struct security_operations { extern int security_init(void); extern int security_module_enable(struct security_operations *ops); extern int register_security(struct security_operations *ops); -extern int mod_reg_security(const char *name, struct security_operations *ops); extern struct dentry *securityfs_create_file(const char *name, mode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); -- cgit v1.2.1 From 7e9db9eaefdb8798730790214ff1b7746006ec98 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 14 Jul 2008 09:58:44 +0200 Subject: [S390] cio: Introduce modalias for css bus. Add modalias and subchannel type attributes for all subchannels. I/O subchannel specific attributes are now created in io_subchannel_probe(). modalias and subchannel type are also added to the uevent for the css bus. Also make the css modalias known. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- include/linux/mod_devicetable.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 69b2342d5ebb..1fd03e732e07 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -159,6 +159,15 @@ struct ap_device_id { #define AP_DEVICE_ID_MATCH_DEVICE_TYPE 0x01 +/* s390 css bus devices (subchannels) */ +struct css_device_id { + __u8 type; /* subchannel type */ + __u8 pad1; + __u16 pad2; + __u32 pad3; + kernel_ulong_t driver_data; +}; + #define ACPI_ID_LEN 16 /* only 9 bytes needed here, 16 bytes are used */ /* to workaround crosscompile issues */ -- cgit v1.2.1 From f08adc008d84f6b03d377ede951e29ed169e76e2 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 14 Jul 2008 09:59:03 +0200 Subject: [S390] css: Use css_device_id for bus matching. css_device_id exists, so use it for determining the right driver (and add a match_flags which is always 1 for valid types). Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- include/linux/mod_devicetable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 1fd03e732e07..c4db5827963d 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -161,8 +161,8 @@ struct ap_device_id { /* s390 css bus devices (subchannels) */ struct css_device_id { + __u8 match_flags; __u8 type; /* subchannel type */ - __u8 pad1; __u16 pad2; __u32 pad3; kernel_ulong_t driver_data; -- cgit v1.2.1