diff options
Diffstat (limited to 'include/linux')
45 files changed, 1332 insertions, 161 deletions
diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 7ac8303c8471..e3ffd14a3f0b 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -47,6 +47,7 @@ header-y += coda_psdev.h header-y += coff.h header-y += comstats.h header-y += const.h +header-y += cgroupstats.h header-y += cycx_cfm.h header-y += dlm_device.h header-y += dlm_netlink.h diff --git a/include/linux/acpi.h b/include/linux/acpi.h index bf5e0009de75..8ccedf7a0a5a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -189,32 +189,6 @@ extern int ec_transaction(u8 command, extern int acpi_blacklisted(void); extern void acpi_bios_year(char *s); -#define ACPI_CSTATE_LIMIT_DEFINED /* for driver builds */ -#ifdef CONFIG_ACPI - -/* - * Set highest legal C-state - * 0: C0 okay, but not C1 - * 1: C1 okay, but not C2 - * 2: C2 okay, but not C3 etc. - */ - -extern unsigned int max_cstate; - -static inline unsigned int acpi_get_cstate_limit(void) -{ - return max_cstate; -} -static inline void acpi_set_cstate_limit(unsigned int new_limit) -{ - max_cstate = new_limit; - return; -} -#else -static inline unsigned int acpi_get_cstate_limit(void) { return 0; } -static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; } -#endif - #ifdef CONFIG_ACPI_NUMA int acpi_get_pxm(acpi_handle handle); int acpi_get_node(acpi_handle *handle); diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 64b4641904fe..acad1105d942 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -6,6 +6,7 @@ #include <linux/types.h> #include <linux/bitops.h> #include <linux/string.h> +#include <linux/kernel.h> /* * bitmaps provide bit arrays that consume one or more unsigned diff --git a/include/linux/bitops.h b/include/linux/bitops.h index b9fb8ee3308b..69c1edb9fe54 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -2,6 +2,14 @@ #define _LINUX_BITOPS_H #include <asm/types.h> +#ifdef __KERNEL__ +#define BIT(nr) (1UL << (nr)) +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) +#define BITS_PER_BYTE 8 +#endif + /* * Include this here because some architectures need generic_ffs/fls in * scope diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h new file mode 100644 index 000000000000..87479328d46d --- /dev/null +++ b/include/linux/cgroup.h @@ -0,0 +1,327 @@ +#ifndef _LINUX_CGROUP_H +#define _LINUX_CGROUP_H +/* + * cgroup interface + * + * Copyright (C) 2003 BULL SA + * Copyright (C) 2004-2006 Silicon Graphics, Inc. + * + */ + +#include <linux/sched.h> +#include <linux/kref.h> +#include <linux/cpumask.h> +#include <linux/nodemask.h> +#include <linux/rcupdate.h> +#include <linux/cgroupstats.h> + +#ifdef CONFIG_CGROUPS + +struct cgroupfs_root; +struct cgroup_subsys; +struct inode; + +extern int cgroup_init_early(void); +extern int cgroup_init(void); +extern void cgroup_init_smp(void); +extern void cgroup_lock(void); +extern void cgroup_unlock(void); +extern void cgroup_fork(struct task_struct *p); +extern void cgroup_fork_callbacks(struct task_struct *p); +extern void cgroup_post_fork(struct task_struct *p); +extern void cgroup_exit(struct task_struct *p, int run_callbacks); +extern int cgroupstats_build(struct cgroupstats *stats, + struct dentry *dentry); + +extern struct file_operations proc_cgroup_operations; + +/* Define the enumeration of all cgroup subsystems */ +#define SUBSYS(_x) _x ## _subsys_id, +enum cgroup_subsys_id { +#include <linux/cgroup_subsys.h> + CGROUP_SUBSYS_COUNT +}; +#undef SUBSYS + +/* Per-subsystem/per-cgroup state maintained by the system. */ +struct cgroup_subsys_state { + /* The cgroup that this subsystem is attached to. Useful + * for subsystems that want to know about the cgroup + * hierarchy structure */ + struct cgroup *cgroup; + + /* State maintained by the cgroup system to allow + * subsystems to be "busy". Should be accessed via css_get() + * and css_put() */ + + atomic_t refcnt; + + unsigned long flags; +}; + +/* bits in struct cgroup_subsys_state flags field */ +enum { + CSS_ROOT, /* This CSS is the root of the subsystem */ +}; + +/* + * Call css_get() to hold a reference on the cgroup; + * + */ + +static inline void css_get(struct cgroup_subsys_state *css) +{ + /* We don't need to reference count the root state */ + if (!test_bit(CSS_ROOT, &css->flags)) + atomic_inc(&css->refcnt); +} +/* + * css_put() should be called to release a reference taken by + * css_get() + */ + +extern void __css_put(struct cgroup_subsys_state *css); +static inline void css_put(struct cgroup_subsys_state *css) +{ + if (!test_bit(CSS_ROOT, &css->flags)) + __css_put(css); +} + +struct cgroup { + unsigned long flags; /* "unsigned long" so bitops work */ + + /* count users of this cgroup. >0 means busy, but doesn't + * necessarily indicate the number of tasks in the + * cgroup */ + atomic_t count; + + /* + * We link our 'sibling' struct into our parent's 'children'. + * Our children link their 'sibling' into our 'children'. + */ + struct list_head sibling; /* my parent's children */ + struct list_head children; /* my children */ + + struct cgroup *parent; /* my parent */ + struct dentry *dentry; /* cgroup fs entry */ + + /* Private pointers for each registered subsystem */ + struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + + struct cgroupfs_root *root; + struct cgroup *top_cgroup; + + /* + * List of cg_cgroup_links pointing at css_sets with + * tasks in this cgroup. Protected by css_set_lock + */ + struct list_head css_sets; + + /* + * Linked list running through all cgroups that can + * potentially be reaped by the release agent. Protected by + * release_list_lock + */ + struct list_head release_list; +}; + +/* A css_set is a structure holding pointers to a set of + * cgroup_subsys_state objects. This saves space in the task struct + * object and speeds up fork()/exit(), since a single inc/dec and a + * list_add()/del() can bump the reference count on the entire + * cgroup set for a task. + */ + +struct css_set { + + /* Reference count */ + struct kref ref; + + /* + * List running through all cgroup groups. Protected by + * css_set_lock + */ + struct list_head list; + + /* + * List running through all tasks using this cgroup + * group. Protected by css_set_lock + */ + struct list_head tasks; + + /* + * List of cg_cgroup_link objects on link chains from + * cgroups referenced from this css_set. Protected by + * css_set_lock + */ + struct list_head cg_links; + + /* + * Set of subsystem states, one for each subsystem. This array + * is immutable after creation apart from the init_css_set + * during subsystem registration (at boot time). + */ + struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + +}; + +/* struct cftype: + * + * The files in the cgroup filesystem mostly have a very simple read/write + * handling, some common function will take care of it. Nevertheless some cases + * (read tasks) are special and therefore I define this structure for every + * kind of file. + * + * + * When reading/writing to a file: + * - the cgroup to use in file->f_dentry->d_parent->d_fsdata + * - the 'cftype' of the file is file->f_dentry->d_fsdata + */ + +#define MAX_CFTYPE_NAME 64 +struct cftype { + /* By convention, the name should begin with the name of the + * subsystem, followed by a period */ + char name[MAX_CFTYPE_NAME]; + int private; + int (*open) (struct inode *inode, struct file *file); + ssize_t (*read) (struct cgroup *cont, struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, loff_t *ppos); + /* + * read_uint() is a shortcut for the common case of returning a + * single integer. Use it in place of read() + */ + u64 (*read_uint) (struct cgroup *cont, struct cftype *cft); + ssize_t (*write) (struct cgroup *cont, struct cftype *cft, + struct file *file, + const char __user *buf, size_t nbytes, loff_t *ppos); + + /* + * write_uint() is a shortcut for the common case of accepting + * a single integer (as parsed by simple_strtoull) from + * userspace. Use in place of write(); return 0 or error. + */ + int (*write_uint) (struct cgroup *cont, struct cftype *cft, u64 val); + + int (*release) (struct inode *inode, struct file *file); +}; + +/* Add a new file to the given cgroup directory. Should only be + * called by subsystems from within a populate() method */ +int cgroup_add_file(struct cgroup *cont, struct cgroup_subsys *subsys, + const struct cftype *cft); + +/* Add a set of new files to the given cgroup directory. Should + * only be called by subsystems from within a populate() method */ +int cgroup_add_files(struct cgroup *cont, + struct cgroup_subsys *subsys, + const struct cftype cft[], + int count); + +int cgroup_is_removed(const struct cgroup *cont); + +int cgroup_path(const struct cgroup *cont, char *buf, int buflen); + +int cgroup_task_count(const struct cgroup *cont); + +/* Return true if the cgroup is a descendant of the current cgroup */ +int cgroup_is_descendant(const struct cgroup *cont); + +/* Control Group subsystem type. See Documentation/cgroups.txt for details */ + +struct cgroup_subsys { + struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss, + struct cgroup *cont); + void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cont); + int (*can_attach)(struct cgroup_subsys *ss, + struct cgroup *cont, struct task_struct *tsk); + void (*attach)(struct cgroup_subsys *ss, struct cgroup *cont, + struct cgroup *old_cont, struct task_struct *tsk); + void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); + void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); + int (*populate)(struct cgroup_subsys *ss, + struct cgroup *cont); + void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cont); + void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); + int subsys_id; + int active; + int early_init; +#define MAX_CGROUP_TYPE_NAMELEN 32 + const char *name; + + /* Protected by RCU */ + struct cgroupfs_root *root; + + struct list_head sibling; + + void *private; +}; + +#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; +#include <linux/cgroup_subsys.h> +#undef SUBSYS + +static inline struct cgroup_subsys_state *cgroup_subsys_state( + struct cgroup *cont, int subsys_id) +{ + return cont->subsys[subsys_id]; +} + +static inline struct cgroup_subsys_state *task_subsys_state( + struct task_struct *task, int subsys_id) +{ + return rcu_dereference(task->cgroups->subsys[subsys_id]); +} + +static inline struct cgroup* task_cgroup(struct task_struct *task, + int subsys_id) +{ + return task_subsys_state(task, subsys_id)->cgroup; +} + +int cgroup_path(const struct cgroup *cont, char *buf, int buflen); + +int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss); + +/* A cgroup_iter should be treated as an opaque object */ +struct cgroup_iter { + struct list_head *cg_link; + struct list_head *task; +}; + +/* To iterate across the tasks in a cgroup: + * + * 1) call cgroup_iter_start to intialize an iterator + * + * 2) call cgroup_iter_next() to retrieve member tasks until it + * returns NULL or until you want to end the iteration + * + * 3) call cgroup_iter_end() to destroy the iterator. + */ +void cgroup_iter_start(struct cgroup *cont, struct cgroup_iter *it); +struct task_struct *cgroup_iter_next(struct cgroup *cont, + struct cgroup_iter *it); +void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it); + +#else /* !CONFIG_CGROUPS */ + +static inline int cgroup_init_early(void) { return 0; } +static inline int cgroup_init(void) { return 0; } +static inline void cgroup_init_smp(void) {} +static inline void cgroup_fork(struct task_struct *p) {} +static inline void cgroup_fork_callbacks(struct task_struct *p) {} +static inline void cgroup_post_fork(struct task_struct *p) {} +static inline void cgroup_exit(struct task_struct *p, int callbacks) {} + +static inline void cgroup_lock(void) {} +static inline void cgroup_unlock(void) {} +static inline int cgroupstats_build(struct cgroupstats *stats, + struct dentry *dentry) +{ + return -EINVAL; +} + +#endif /* !CONFIG_CGROUPS */ + +#endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h new file mode 100644 index 000000000000..0b9bfbde8168 --- /dev/null +++ b/include/linux/cgroup_subsys.h @@ -0,0 +1,38 @@ +/* Add subsystem definitions of the form SUBSYS(<name>) in this + * file. Surround each one by a line of comment markers so that + * patches don't collide + */ + +/* */ + +/* */ + +#ifdef CONFIG_CPUSETS +SUBSYS(cpuset) +#endif + +/* */ + +#ifdef CONFIG_CGROUP_CPUACCT +SUBSYS(cpuacct) +#endif + +/* */ + +#ifdef CONFIG_CGROUP_DEBUG +SUBSYS(debug) +#endif + +/* */ + +#ifdef CONFIG_CGROUP_NS +SUBSYS(ns) +#endif + +/* */ + +#ifdef CONFIG_FAIR_CGROUP_SCHED +SUBSYS(cpu_cgroup) +#endif + +/* */ diff --git a/include/linux/cgroupstats.h b/include/linux/cgroupstats.h new file mode 100644 index 000000000000..4f53abf6855d --- /dev/null +++ b/include/linux/cgroupstats.h @@ -0,0 +1,70 @@ +/* cgroupstats.h - exporting per-cgroup statistics + * + * Copyright IBM Corporation, 2007 + * Author Balbir Singh <balbir@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef _LINUX_CGROUPSTATS_H +#define _LINUX_CGROUPSTATS_H + +#include <linux/taskstats.h> + +/* + * Data shared between user space and kernel space on a per cgroup + * basis. This data is shared using taskstats. + * + * Most of these states are derived by looking at the task->state value + * For the nr_io_wait state, a flag in the delay accounting structure + * indicates that the task is waiting on IO + * + * Each member is aligned to a 8 byte boundary. + */ +struct cgroupstats { + __u64 nr_sleeping; /* Number of tasks sleeping */ + __u64 nr_running; /* Number of tasks running */ + __u64 nr_stopped; /* Number of tasks in stopped state */ + __u64 nr_uninterruptible; /* Number of tasks in uninterruptible */ + /* state */ + __u64 nr_io_wait; /* Number of tasks waiting on IO */ +}; + +/* + * Commands sent from userspace + * Not versioned. New commands should only be inserted at the enum's end + * prior to __CGROUPSTATS_CMD_MAX + */ + +enum { + CGROUPSTATS_CMD_UNSPEC = __TASKSTATS_CMD_MAX, /* Reserved */ + CGROUPSTATS_CMD_GET, /* user->kernel request/get-response */ + CGROUPSTATS_CMD_NEW, /* kernel->user event */ + __CGROUPSTATS_CMD_MAX, +}; + +#define CGROUPSTATS_CMD_MAX (__CGROUPSTATS_CMD_MAX - 1) + +enum { + CGROUPSTATS_TYPE_UNSPEC = 0, /* Reserved */ + CGROUPSTATS_TYPE_CGROUP_STATS, /* contains name + stats */ + __CGROUPSTATS_TYPE_MAX, +}; + +#define CGROUPSTATS_TYPE_MAX (__CGROUPSTATS_TYPE_MAX - 1) + +enum { + CGROUPSTATS_CMD_ATTR_UNSPEC = 0, + CGROUPSTATS_CMD_ATTR_FD, + __CGROUPSTATS_CMD_ATTR_MAX, +}; + +#define CGROUPSTATS_CMD_ATTR_MAX (__CGROUPSTATS_CMD_ATTR_MAX - 1) + +#endif /* _LINUX_CGROUPSTATS_H */ diff --git a/include/linux/cpu_acct.h b/include/linux/cpu_acct.h new file mode 100644 index 000000000000..6b5fd8a66c8d --- /dev/null +++ b/include/linux/cpu_acct.h @@ -0,0 +1,14 @@ + +#ifndef _LINUX_CPU_ACCT_H +#define _LINUX_CPU_ACCT_H + +#include <linux/cgroup.h> +#include <asm/cputime.h> + +#ifdef CONFIG_CGROUP_CPUACCT +extern void cpuacct_charge(struct task_struct *, cputime_t cputime); +#else +static void inline cpuacct_charge(struct task_struct *p, cputime_t cputime) {} +#endif + +#endif diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h new file mode 100644 index 000000000000..16a51546db44 --- /dev/null +++ b/include/linux/cpuidle.h @@ -0,0 +1,180 @@ +/* + * cpuidle.h - a generic framework for CPU idle power management + * + * (C) 2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * Shaohua Li <shaohua.li@intel.com> + * Adam Belay <abelay@novell.com> + * + * This code is licenced under the GPL. + */ + +#ifndef _LINUX_CPUIDLE_H +#define _LINUX_CPUIDLE_H + +#include <linux/percpu.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/kobject.h> +#include <linux/completion.h> + +#define CPUIDLE_STATE_MAX 8 +#define CPUIDLE_NAME_LEN 16 + +struct cpuidle_device; + + +/**************************** + * CPUIDLE DEVICE INTERFACE * + ****************************/ + +struct cpuidle_state { + char name[CPUIDLE_NAME_LEN]; + void *driver_data; + + unsigned int flags; + unsigned int exit_latency; /* in US */ + unsigned int power_usage; /* in mW */ + unsigned int target_residency; /* in US */ + + unsigned int usage; + unsigned int time; /* in US */ + + int (*enter) (struct cpuidle_device *dev, + struct cpuidle_state *state); +}; + +/* Idle State Flags */ +#define CPUIDLE_FLAG_TIME_VALID (0x01) /* is residency time measurable? */ +#define CPUIDLE_FLAG_CHECK_BM (0x02) /* BM activity will exit state */ +#define CPUIDLE_FLAG_SHALLOW (0x10) /* low latency, minimal savings */ +#define CPUIDLE_FLAG_BALANCED (0x20) /* medium latency, moderate savings */ +#define CPUIDLE_FLAG_DEEP (0x40) /* high latency, large savings */ + +#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) + +/** + * cpuidle_get_statedata - retrieves private driver state data + * @state: the state + */ +static inline void * cpuidle_get_statedata(struct cpuidle_state *state) +{ + return state->driver_data; +} + +/** + * cpuidle_set_statedata - stores private driver state data + * @state: the state + * @data: the private data + */ +static inline void +cpuidle_set_statedata(struct cpuidle_state *state, void *data) +{ + state->driver_data = data; +} + +struct cpuidle_state_kobj { + struct cpuidle_state *state; + struct completion kobj_unregister; + struct kobject kobj; +}; + +struct cpuidle_device { + int enabled:1; + unsigned int cpu; + + int last_residency; + int state_count; + struct cpuidle_state states[CPUIDLE_STATE_MAX]; + struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; + struct cpuidle_state *last_state; + + struct list_head device_list; + struct kobject kobj; + struct completion kobj_unregister; + void *governor_data; +}; + +DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); + +/** + * cpuidle_get_last_residency - retrieves the last state's residency time + * @dev: the target CPU + * + * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_VALID isn't set + */ +static inline int cpuidle_get_last_residency(struct cpuidle_device *dev) +{ + return dev->last_residency; +} + + +/**************************** + * CPUIDLE DRIVER INTERFACE * + ****************************/ + +struct cpuidle_driver { + char name[CPUIDLE_NAME_LEN]; + struct module *owner; +}; + +#ifdef CONFIG_CPU_IDLE + +extern int cpuidle_register_driver(struct cpuidle_driver *drv); +extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); +extern int cpuidle_register_device(struct cpuidle_device *dev); +extern void cpuidle_unregister_device(struct cpuidle_device *dev); + +extern void cpuidle_pause_and_lock(void); +extern void cpuidle_resume_and_unlock(void); +extern int cpuidle_enable_device(struct cpuidle_device *dev); +extern void cpuidle_disable_device(struct cpuidle_device *dev); + +#else + +static inline int cpuidle_register_driver(struct cpuidle_driver *drv) +{return 0;} +static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { } +static inline int cpuidle_register_device(struct cpuidle_device *dev) +{return 0;} +static inline void cpuidle_unregister_device(struct cpuidle_device *dev) { } + +static inline void cpuidle_pause_and_lock(void) { } +static inline void cpuidle_resume_and_unlock(void) { } +static inline int cpuidle_enable_device(struct cpuidle_device *dev) +{return 0;} +static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } + +#endif + +/****************************** + * CPUIDLE GOVERNOR INTERFACE * + ******************************/ + +struct cpuidle_governor { + char name[CPUIDLE_NAME_LEN]; + struct list_head governor_list; + unsigned int rating; + + int (*enable) (struct cpuidle_device *dev); + void (*disable) (struct cpuidle_device *dev); + + int (*select) (struct cpuidle_device *dev); + void (*reflect) (struct cpuidle_device *dev); + + struct module *owner; +}; + +#ifdef CONFIG_CPU_IDLE + +extern int cpuidle_register_governor(struct cpuidle_governor *gov); +extern void cpuidle_unregister_governor(struct cpuidle_governor *gov); + +#else + +static inline int cpuidle_register_governor(struct cpuidle_governor *gov) +{return 0;} +static inline void cpuidle_unregister_governor(struct cpuidle_governor *gov) { } + +#endif + +#endif /* _LINUX_CPUIDLE_H */ diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index ea44d2e768a0..ecae585ec3da 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/cpumask.h> #include <linux/nodemask.h> +#include <linux/cgroup.h> #ifdef CONFIG_CPUSETS @@ -19,9 +20,8 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */ extern int cpuset_init_early(void); extern int cpuset_init(void); extern void cpuset_init_smp(void); -extern void cpuset_fork(struct task_struct *p); -extern void cpuset_exit(struct task_struct *p); extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); +extern cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -76,18 +76,22 @@ static inline int cpuset_do_slab_mem_spread(void) extern void cpuset_track_online_nodes(void); +extern int current_cpuset_is_being_rebound(void); + #else /* !CONFIG_CPUSETS */ static inline int cpuset_init_early(void) { return 0; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} -static inline void cpuset_fork(struct task_struct *p) {} -static inline void cpuset_exit(struct task_struct *p) {} static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) { return cpu_possible_map; } +static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p) +{ + return cpu_possible_map; +} static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) { @@ -148,6 +152,11 @@ static inline int cpuset_do_slab_mem_spread(void) static inline void cpuset_track_online_nodes(void) {} +static inline int current_cpuset_is_being_rebound(void) +{ + return 0; +} + #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 55d1ca5e60f5..ab94bc083558 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -26,6 +26,7 @@ * Used to set current->delays->flags */ #define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */ +#define DELAYACCT_PF_BLKIO 0x00000002 /* I am waiting on IO */ #ifdef CONFIG_TASK_DELAY_ACCT @@ -39,6 +40,14 @@ extern void __delayacct_blkio_end(void); extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); extern __u64 __delayacct_blkio_ticks(struct task_struct *); +static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) +{ + if (p->delays) + return (p->delays->flags & DELAYACCT_PF_BLKIO); + else + return 0; +} + static inline void delayacct_set_flag(int flag) { if (current->delays) @@ -71,6 +80,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk) static inline void delayacct_blkio_start(void) { + delayacct_set_flag(DELAYACCT_PF_BLKIO); if (current->delays) __delayacct_blkio_start(); } @@ -79,6 +89,7 @@ static inline void delayacct_blkio_end(void) { if (current->delays) __delayacct_blkio_end(); + delayacct_clear_flag(DELAYACCT_PF_BLKIO); } static inline int delayacct_add_tsk(struct taskstats *d, @@ -116,6 +127,8 @@ static inline int delayacct_add_tsk(struct taskstats *d, { return 0; } static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) { return 0; } +static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) +{ return 0; } #endif /* CONFIG_TASK_DELAY_ACCT */ #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 6a4d170ad9a5..1657e995f72c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -123,6 +123,7 @@ extern int dir_notify_enable; #define MS_SLAVE (1<<19) /* change to slave */ #define MS_SHARED (1<<20) /* change to shared */ #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -1459,7 +1460,8 @@ void unnamed_dev_init(void); extern int register_filesystem(struct file_system_type *); extern int unregister_filesystem(struct file_system_type *); -extern struct vfsmount *kern_mount(struct file_system_type *); +extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); +#define kern_mount(type) kern_mount_data(type, NULL) extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); extern void umount_tree(struct vfsmount *, int, struct list_head *); @@ -1922,6 +1924,8 @@ extern int vfs_fstat(unsigned int, struct kstat *); extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long); +extern void get_filesystem(struct file_system_type *fs); +extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); extern struct super_block *user_get_super(dev_t); diff --git a/include/linux/hid.h b/include/linux/hid.h index edb8024d744b..6e35b92b1d2c 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -469,8 +469,8 @@ struct hid_device { /* device report descriptor */ /* handler for raw output data, used by hidraw */ int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t); #ifdef CONFIG_USB_HIDINPUT_POWERBOOK - unsigned long pb_pressed_fn[NBITS(KEY_MAX)]; - unsigned long pb_pressed_numlock[NBITS(KEY_MAX)]; + unsigned long pb_pressed_fn[BITS_TO_LONGS(KEY_CNT)]; + unsigned long pb_pressed_numlock[BITS_TO_LONGS(KEY_CNT)]; #endif }; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d4b2f1c76e12..cae35b6b9aec 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -67,9 +67,6 @@ .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ .rlim = INIT_RLIMITS, \ - .pgrp = 0, \ - .tty_old_pgrp = NULL, \ - { .__session = 0}, \ } extern struct nsproxy init_nsproxy; @@ -94,15 +91,18 @@ extern struct group_info init_groups; #define INIT_STRUCT_PID { \ .count = ATOMIC_INIT(1), \ - .nr = 0, \ - /* Don't put this struct pid in pid_hash */ \ - .pid_chain = { .next = NULL, .pprev = NULL }, \ .tasks = { \ { .first = &init_task.pids[PIDTYPE_PID].node }, \ { .first = &init_task.pids[PIDTYPE_PGID].node }, \ { .first = &init_task.pids[PIDTYPE_SID].node }, \ }, \ .rcu = RCU_HEAD_INIT, \ + .level = 0, \ + .numbers = { { \ + .nr = 0, \ + .ns = &init_pid_ns, \ + .pid_chain = { .next = NULL, .pprev = NULL }, \ + }, } \ } #define INIT_PID_LINK(type) \ diff --git a/include/linux/input.h b/include/linux/input.h index f30da6fc08e3..62268929856c 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -98,6 +98,7 @@ struct input_absinfo { #define EV_PWR 0x16 #define EV_FF_STATUS 0x17 #define EV_MAX 0x1f +#define EV_CNT (EV_MAX+1) /* * Synchronization events. @@ -567,6 +568,7 @@ struct input_absinfo { /* We avoid low common keys in module aliases so they don't get huge. */ #define KEY_MIN_INTERESTING KEY_MUTE #define KEY_MAX 0x1ff +#define KEY_CNT (KEY_MAX+1) /* * Relative axes @@ -583,6 +585,7 @@ struct input_absinfo { #define REL_WHEEL 0x08 #define REL_MISC 0x09 #define REL_MAX 0x0f +#define REL_CNT (REL_MAX+1) /* * Absolute axes @@ -615,6 +618,7 @@ struct input_absinfo { #define ABS_VOLUME 0x20 #define ABS_MISC 0x28 #define ABS_MAX 0x3f +#define ABS_CNT (ABS_MAX+1) /* * Switch events @@ -625,6 +629,7 @@ struct input_absinfo { #define SW_HEADPHONE_INSERT 0x02 /* set = inserted */ #define SW_RADIO 0x03 /* set = radio enabled */ #define SW_MAX 0x0f +#define SW_CNT (SW_MAX+1) /* * Misc events @@ -636,6 +641,7 @@ struct input_absinfo { #define MSC_RAW 0x03 #define MSC_SCAN 0x04 #define MSC_MAX 0x07 +#define MSC_CNT (MSC_MAX+1) /* * LEDs @@ -653,6 +659,7 @@ struct input_absinfo { #define LED_MAIL 0x09 #define LED_CHARGING 0x0a #define LED_MAX 0x0f +#define LED_CNT (LED_MAX+1) /* * Autorepeat values @@ -670,6 +677,7 @@ struct input_absinfo { #define SND_BELL 0x01 #define SND_TONE 0x02 #define SND_MAX 0x07 +#define SND_CNT (SND_MAX+1) /* * IDs. @@ -920,6 +928,7 @@ struct ff_effect { #define FF_AUTOCENTER 0x61 #define FF_MAX 0x7f +#define FF_CNT (FF_MAX+1) #ifdef __KERNEL__ @@ -932,10 +941,6 @@ struct ff_effect { #include <linux/timer.h> #include <linux/mod_devicetable.h> -#define NBITS(x) (((x)/BITS_PER_LONG)+1) -#define BIT(x) (1UL<<((x)%BITS_PER_LONG)) -#define LONG(x) ((x)/BITS_PER_LONG) - /** * struct input_dev - represents an input device * @name: name of the device @@ -1005,28 +1010,30 @@ struct ff_effect { * @going_away: marks devices that are in a middle of unregistering and * causes input_open_device*() fail with -ENODEV. * @dev: driver model's view of this device + * @cdev: union for struct device pointer * @h_list: list of input handles associated with the device. When * accessing the list dev->mutex must be held * @node: used to place the device onto input_dev_list */ struct input_dev { - + /* private: */ void *private; /* do not use */ + /* public: */ const char *name; const char *phys; const char *uniq; struct input_id id; - unsigned long evbit[NBITS(EV_MAX)]; - unsigned long keybit[NBITS(KEY_MAX)]; - unsigned long relbit[NBITS(REL_MAX)]; - unsigned long absbit[NBITS(ABS_MAX)]; - unsigned long mscbit[NBITS(MSC_MAX)]; - unsigned long ledbit[NBITS(LED_MAX)]; - unsigned long sndbit[NBITS(SND_MAX)]; - unsigned long ffbit[NBITS(FF_MAX)]; - unsigned long swbit[NBITS(SW_MAX)]; + unsigned long evbit[BITS_TO_LONGS(EV_CNT)]; + unsigned long keybit[BITS_TO_LONGS(KEY_CNT)]; + unsigned long relbit[BITS_TO_LONGS(REL_CNT)]; + unsigned long absbit[BITS_TO_LONGS(ABS_CNT)]; + unsigned long mscbit[BITS_TO_LONGS(MSC_CNT)]; + unsigned long ledbit[BITS_TO_LONGS(LED_CNT)]; + unsigned long sndbit[BITS_TO_LONGS(SND_CNT)]; + unsigned long ffbit[BITS_TO_LONGS(FF_CNT)]; + unsigned long swbit[BITS_TO_LONGS(SW_CNT)]; unsigned int keycodemax; unsigned int keycodesize; @@ -1044,10 +1051,10 @@ struct input_dev { int abs[ABS_MAX + 1]; int rep[REP_MAX + 1]; - unsigned long key[NBITS(KEY_MAX)]; - unsigned long led[NBITS(LED_MAX)]; - unsigned long snd[NBITS(SND_MAX)]; - unsigned long sw[NBITS(SW_MAX)]; + unsigned long key[BITS_TO_LONGS(KEY_CNT)]; + unsigned long led[BITS_TO_LONGS(LED_CNT)]; + unsigned long snd[BITS_TO_LONGS(SND_CNT)]; + unsigned long sw[BITS_TO_LONGS(SW_CNT)]; int absmax[ABS_MAX + 1]; int absmin[ABS_MAX + 1]; @@ -1291,7 +1298,7 @@ static inline void input_set_abs_params(struct input_dev *dev, int axis, int min dev->absfuzz[axis] = fuzz; dev->absflat[axis] = flat; - dev->absbit[LONG(axis)] |= BIT(axis); + dev->absbit[BIT_WORD(axis)] |= BIT_MASK(axis); } extern struct class input_class; @@ -1332,7 +1339,7 @@ struct ff_device { void *private; - unsigned long ffbit[NBITS(FF_MAX)]; + unsigned long ffbit[BITS_TO_LONGS(FF_CNT)]; struct mutex mutex; diff --git a/include/linux/ipc.h b/include/linux/ipc.h index ee111834091c..408696ea5189 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -89,6 +89,7 @@ struct kern_ipc_perm { spinlock_t lock; int deleted; + int id; key_t key; uid_t uid; gid_t gid; @@ -110,6 +111,8 @@ struct ipc_namespace { int msg_ctlmax; int msg_ctlmnb; int msg_ctlmni; + atomic_t msg_bytes; + atomic_t msg_hdrs; size_t shm_ctlmax; size_t shm_ctlall; diff --git a/include/linux/jbd.h b/include/linux/jbd.h index a3abf51e488f..16e7ed855a18 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -58,7 +58,7 @@ * CONFIG_JBD_DEBUG is on. */ #define JBD_EXPENSIVE_CHECKING -extern int journal_enable_debug; +extern u8 journal_enable_debug; #define jbd_debug(n, f, a...) \ do { \ @@ -248,17 +248,7 @@ typedef struct journal_superblock_s #include <linux/fs.h> #include <linux/sched.h> -#define JBD_ASSERTIONS -#ifdef JBD_ASSERTIONS -#define J_ASSERT(assert) \ -do { \ - if (!(assert)) { \ - printk (KERN_EMERG \ - "Assertion failure in %s() at %s:%d: \"%s\"\n", \ - __FUNCTION__, __FILE__, __LINE__, # assert); \ - BUG(); \ - } \ -} while (0) +#define J_ASSERT(assert) BUG_ON(!(assert)) #if defined(CONFIG_BUFFER_DEBUG) void buffer_assertion_failure(struct buffer_head *bh); @@ -274,10 +264,6 @@ void buffer_assertion_failure(struct buffer_head *bh); #define J_ASSERT_JH(jh, expr) J_ASSERT(expr) #endif -#else -#define J_ASSERT(assert) do { } while (0) -#endif /* JBD_ASSERTIONS */ - #if defined(JBD_PARANOID_IOFAIL) #define J_EXPECT(expr, why...) J_ASSERT(expr) #define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index ad4b82ce84af..2d9c448d8c52 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -187,6 +187,8 @@ extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; extern size_t vmcoreinfo_size; extern size_t vmcoreinfo_max_size; +int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base); #else /* !CONFIG_KEXEC */ struct pt_regs; diff --git a/include/linux/keyboard.h b/include/linux/keyboard.h index 33b5c2e325b9..65c2d70853e9 100644 --- a/include/linux/keyboard.h +++ b/include/linux/keyboard.h @@ -23,10 +23,21 @@ #define MAX_NR_OF_USER_KEYMAPS 256 /* should be at least 7 */ #ifdef __KERNEL__ +struct notifier_block; extern const int NR_TYPES; extern const int max_vals[]; extern unsigned short *key_maps[MAX_NR_KEYMAPS]; extern unsigned short plain_map[NR_KEYS]; + +struct keyboard_notifier_param { + struct vc_data *vc; /* VC on which the keyboard press was done */ + int down; /* Pressure of the key? */ + int shift; /* Current shift mask */ + unsigned int value; /* keycode, unicode value or keysym */ +}; + +extern int register_keyboard_notifier(struct notifier_block *nb); +extern int unregister_keyboard_notifier(struct notifier_block *nb); #endif #define MAX_NR_FUNC 256 /* max nr of strings assigned to keys */ diff --git a/include/linux/list.h b/include/linux/list.h index b0cf0135fe3e..75ce2cb4ff6e 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -478,8 +478,7 @@ static inline void list_splice_init_rcu(struct list_head *list, pos = n, n = pos->next) /** - * list_for_each_prev_safe - iterate over a list backwards safe against removal - of list entry + * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry * @pos: the &struct list_head to use as a loop cursor. * @n: another &struct list_head to use as temporary storage * @head: the head for your list. diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index f6279f68a827..4c4d236ded18 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -276,6 +276,14 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, (lock)->dep_map.key, sub) /* + * To initialize a lockdep_map statically use this macro. + * Note that _name must not be NULL. + */ +#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ + { .name = (_name), .key = (void *)(_key), } + + +/* * Acquire a lock. * * Values for "read": diff --git a/include/linux/magic.h b/include/linux/magic.h index 722d4755060f..1fa0c2ce4dec 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -37,6 +37,7 @@ #define SMB_SUPER_MAGIC 0x517B #define USBDEVICE_SUPER_MAGIC 0x9fa2 +#define CGROUP_SUPER_MAGIC 0x27e0eb #define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA diff --git a/include/linux/marker.h b/include/linux/marker.h new file mode 100644 index 000000000000..5f36cf946bcb --- /dev/null +++ b/include/linux/marker.h @@ -0,0 +1,129 @@ +#ifndef _LINUX_MARKER_H +#define _LINUX_MARKER_H + +/* + * Code markup for dynamic and static tracing. + * + * See Documentation/marker.txt. + * + * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#include <linux/types.h> + +struct module; +struct marker; + +/** + * marker_probe_func - Type of a marker probe function + * @mdata: pointer of type struct marker + * @private_data: caller site private data + * @fmt: format string + * @...: variable argument list + * + * Type of marker probe functions. They receive the mdata and need to parse the + * format string to recover the variable argument list. + */ +typedef void marker_probe_func(const struct marker *mdata, + void *private_data, const char *fmt, ...); + +struct marker { + const char *name; /* Marker name */ + const char *format; /* Marker format string, describing the + * variable argument list. + */ + char state; /* Marker state. */ + marker_probe_func *call;/* Probe handler function pointer */ + void *private; /* Private probe data */ +} __attribute__((aligned(8))); + +#ifdef CONFIG_MARKERS + +/* + * Note : the empty asm volatile with read constraint is used here instead of a + * "used" attribute to fix a gcc 4.1.x bug. + * Make sure the alignment of the structure in the __markers section will + * not add unwanted padding between the beginning of the section and the + * structure. Force alignment to the same alignment as the section start. + */ +#define __trace_mark(name, call_data, format, args...) \ + do { \ + static const char __mstrtab_name_##name[] \ + __attribute__((section("__markers_strings"))) \ + = #name; \ + static const char __mstrtab_format_##name[] \ + __attribute__((section("__markers_strings"))) \ + = format; \ + static struct marker __mark_##name \ + __attribute__((section("__markers"), aligned(8))) = \ + { __mstrtab_name_##name, __mstrtab_format_##name, \ + 0, __mark_empty_function, NULL }; \ + __mark_check_format(format, ## args); \ + if (unlikely(__mark_##name.state)) { \ + preempt_disable(); \ + (*__mark_##name.call) \ + (&__mark_##name, call_data, \ + format, ## args); \ + preempt_enable(); \ + } \ + } while (0) + +extern void marker_update_probe_range(struct marker *begin, + struct marker *end, struct module *probe_module, int *refcount); +#else /* !CONFIG_MARKERS */ +#define __trace_mark(name, call_data, format, args...) \ + __mark_check_format(format, ## args) +static inline void marker_update_probe_range(struct marker *begin, + struct marker *end, struct module *probe_module, int *refcount) +{ } +#endif /* CONFIG_MARKERS */ + +/** + * trace_mark - Marker + * @name: marker name, not quoted. + * @format: format string + * @args...: variable argument list + * + * Places a marker. + */ +#define trace_mark(name, format, args...) \ + __trace_mark(name, NULL, format, ## args) + +#define MARK_MAX_FORMAT_LEN 1024 + +/** + * MARK_NOARGS - Format string for a marker with no argument. + */ +#define MARK_NOARGS " " + +/* To be used for string format validity checking with gcc */ +static inline void __printf(1, 2) __mark_check_format(const char *fmt, ...) +{ +} + +extern marker_probe_func __mark_empty_function; + +/* + * Connect a probe to a marker. + * private data pointer must be a valid allocated memory address, or NULL. + */ +extern int marker_probe_register(const char *name, const char *format, + marker_probe_func *probe, void *private); + +/* + * Returns the private data given to marker_probe_register. + */ +extern void *marker_probe_unregister(const char *name); +/* + * Unregister a marker by providing the registered private data. + */ +extern void *marker_probe_unregister_private_data(void *private); + +extern int marker_arm(const char *name); +extern int marker_disarm(const char *name); +extern void *marker_get_private_data(const char *name); + +#endif diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 38c04d61ee06..59c4865bc85f 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -148,14 +148,6 @@ extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new); extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); extern void mpol_fix_fork_child_flag(struct task_struct *p); -#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x)) - -#ifdef CONFIG_CPUSETS -#define current_cpuset_is_being_rebound() \ - (cpuset_being_rebound == current->cpuset) -#else -#define current_cpuset_is_being_rebound() 0 -#endif extern struct mempolicy default_policy; extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, @@ -173,8 +165,6 @@ static inline void check_highest_zone(enum zone_type k) int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); -extern void *cpuset_being_rebound; /* Trigger mpol_copy vma rebind */ - #else struct mempolicy {}; @@ -248,8 +238,6 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p) { } -#define set_cpuset_being_rebound(x) do {} while (0) - static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol) { diff --git a/include/linux/module.h b/include/linux/module.h index 642f325e4917..2cbc0b87e329 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -15,6 +15,7 @@ #include <linux/stringify.h> #include <linux/kobject.h> #include <linux/moduleparam.h> +#include <linux/marker.h> #include <asm/local.h> #include <asm/module.h> @@ -354,6 +355,10 @@ struct module /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ char *args; +#ifdef CONFIG_MARKERS + struct marker *markers; + unsigned int num_markers; +#endif }; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} @@ -457,6 +462,8 @@ int unregister_module_notifier(struct notifier_block * nb); extern void print_modules(void); +extern void module_update_markers(struct module *probe_module, int *refcount); + #else /* !CONFIG_MODULES... */ #define EXPORT_SYMBOL(sym) #define EXPORT_SYMBOL_GPL(sym) @@ -556,6 +563,11 @@ static inline void print_modules(void) { } +static inline void module_update_markers(struct module *probe_module, + int *refcount) +{ +} + #endif /* CONFIG_MODULES */ struct device_driver; diff --git a/include/linux/msg.h b/include/linux/msg.h index f1b60740d641..10a3d5a1abff 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -77,7 +77,6 @@ struct msg_msg { /* one msq_queue structure for each present queue on the system */ struct msg_queue { struct kern_ipc_perm q_perm; - int q_id; time_t q_stime; /* last msgsnd time */ time_t q_rtime; /* last msgrcv time */ time_t q_ctime; /* last change time */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c5164c257f71..e82a6ebc725d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -160,6 +160,12 @@ struct nfs_inode { /* Open contexts for shared mmap writes */ struct list_head open_files; + /* Number of in-flight sillydelete RPC calls */ + atomic_t silly_count; + /* List of deferred sillydelete requests */ + struct hlist_head silly_list; + wait_queue_head_t waitqueue; + #ifdef CONFIG_NFS_V4 struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ @@ -394,6 +400,8 @@ extern void nfs_release_automount_timer(void); */ extern int nfs_async_unlink(struct inode *dir, struct dentry *dentry); extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); +extern void nfs_block_sillyrename(struct dentry *dentry); +extern void nfs_unblock_sillyrename(struct dentry *dentry); /* * linux/fs/nfs/write.c diff --git a/include/linux/notifier.h b/include/linux/notifier.h index fad7ff17e468..0c40cc0b4a36 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -231,5 +231,22 @@ static inline int notifier_to_errno(int ret) #define PM_SUSPEND_PREPARE 0x0003 /* Going to suspend the system */ #define PM_POST_SUSPEND 0x0004 /* Suspend finished */ +/* Console keyboard events. + * Note: KBD_KEYCODE is always sent before KBD_UNBOUND_KEYCODE, KBD_UNICODE and + * KBD_KEYSYM. */ +#define KBD_KEYCODE 0x0001 /* Keyboard keycode, called before any other */ +#define KBD_UNBOUND_KEYCODE 0x0002 /* Keyboard keycode which is not bound to any other */ +#define KBD_UNICODE 0x0003 /* Keyboard unicode */ +#define KBD_KEYSYM 0x0004 /* Keyboard keysym */ +#define KBD_POST_KEYSYM 0x0005 /* Called after keyboard keysym interpretation */ + +extern struct blocking_notifier_head reboot_notifier_list; + +/* Virtual Terminal events. */ +#define VT_ALLOCATE 0x0001 /* Console got allocated */ +#define VT_DEALLOCATE 0x0002 /* Console will be deallocated */ +#define VT_WRITE 0x0003 /* A char got output */ +#define VT_UPDATE 0x0004 /* A bigger update occurred */ + #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 033a648709b6..0e66b57631fc 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -32,8 +32,39 @@ struct nsproxy { }; extern struct nsproxy init_nsproxy; +/* + * the namespaces access rules are: + * + * 1. only current task is allowed to change tsk->nsproxy pointer or + * any pointer on the nsproxy itself + * + * 2. when accessing (i.e. reading) current task's namespaces - no + * precautions should be taken - just dereference the pointers + * + * 3. the access to other task namespaces is performed like this + * rcu_read_lock(); + * nsproxy = task_nsproxy(tsk); + * if (nsproxy != NULL) { + * / * + * * work with the namespaces here + * * e.g. get the reference on one of them + * * / + * } / * + * * NULL task_nsproxy() means that this task is + * * almost dead (zombie) + * * / + * rcu_read_unlock(); + * + */ + +static inline struct nsproxy *task_nsproxy(struct task_struct *tsk) +{ + return rcu_dereference(tsk->nsproxy); +} + int copy_namespaces(unsigned long flags, struct task_struct *tsk); -void get_task_namespaces(struct task_struct *tsk); +void exit_task_namespaces(struct task_struct *tsk); +void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); void free_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, struct fs_struct *); @@ -45,14 +76,15 @@ static inline void put_nsproxy(struct nsproxy *ns) } } -static inline void exit_task_namespaces(struct task_struct *p) +static inline void get_nsproxy(struct nsproxy *ns) { - struct nsproxy *ns = p->nsproxy; - if (ns) { - task_lock(p); - p->nsproxy = NULL; - task_unlock(p); - put_nsproxy(ns); - } + atomic_inc(&ns->count); } + +#ifdef CONFIG_CGROUP_NS +int ns_cgroup_clone(struct task_struct *tsk); +#else +static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; } +#endif + #endif diff --git a/include/linux/of.h b/include/linux/of.h index 6df80e985914..5c39b9270ff7 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -16,8 +16,8 @@ * 2 of the License, or (at your option) any later version. */ #include <linux/types.h> +#include <linux/bitops.h> -#include <asm/bitops.h> #include <asm/prom.h> /* flag descriptions */ diff --git a/include/linux/phantom.h b/include/linux/phantom.h index d3ebbfae6903..96f4048a6cc3 100644 --- a/include/linux/phantom.h +++ b/include/linux/phantom.h @@ -30,7 +30,11 @@ struct phm_regs { #define PHN_SET_REG _IOW (PH_IOC_MAGIC, 1, struct phm_reg *) #define PHN_GET_REGS _IOWR(PH_IOC_MAGIC, 2, struct phm_regs *) #define PHN_SET_REGS _IOW (PH_IOC_MAGIC, 3, struct phm_regs *) -#define PH_IOC_MAXNR 3 +/* this ioctl tells the driver, that the caller is not OpenHaptics and might + * use improved registers update (no more phantom switchoffs when using + * libphantom) */ +#define PHN_NOT_OH _IO (PH_IOC_MAGIC, 4) +#define PH_IOC_MAXNR 4 #define PHN_CONTROL 0x6 /* control byte in iaddr space */ #define PHN_CTL_AMP 0x1 /* switch after torques change */ diff --git a/include/linux/pid.h b/include/linux/pid.h index 1e0e4e3423a6..e29a900a8499 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -40,15 +40,28 @@ enum pid_type * processes. */ -struct pid -{ - atomic_t count; + +/* + * struct upid is used to get the id of the struct pid, as it is + * seen in particular namespace. Later the struct pid is found with + * find_pid_ns() using the int nr and struct pid_namespace *ns. + */ + +struct upid { /* Try to keep pid_chain in the same cacheline as nr for find_pid */ int nr; + struct pid_namespace *ns; struct hlist_node pid_chain; +}; + +struct pid +{ + atomic_t count; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct rcu_head rcu; + int level; + struct upid numbers[1]; }; extern struct pid init_struct_pid; @@ -83,26 +96,60 @@ extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type)); extern void FASTCALL(transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type)); +struct pid_namespace; +extern struct pid_namespace init_pid_ns; + /* * look up a PID in the hash table. Must be called with the tasklist_lock * or rcu_read_lock() held. + * + * find_pid_ns() finds the pid in the namespace specified + * find_pid() find the pid by its global id, i.e. in the init namespace + * find_vpid() finr the pid by its virtual id, i.e. in the current namespace + * + * see also find_task_by_pid() set in include/linux/sched.h */ -extern struct pid *FASTCALL(find_pid(int nr)); +extern struct pid *FASTCALL(find_pid_ns(int nr, struct pid_namespace *ns)); +extern struct pid *find_vpid(int nr); +extern struct pid *find_pid(int nr); /* * Lookup a PID in the hash table, and return with it's count elevated. */ extern struct pid *find_get_pid(int nr); -extern struct pid *find_ge_pid(int nr); +extern struct pid *find_ge_pid(int nr, struct pid_namespace *); -extern struct pid *alloc_pid(void); +extern struct pid *alloc_pid(struct pid_namespace *ns); extern void FASTCALL(free_pid(struct pid *pid)); +extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); + +/* + * the helpers to get the pid's id seen from different namespaces + * + * pid_nr() : global id, i.e. the id seen from the init namespace; + * pid_vnr() : virtual id, i.e. the id seen from the namespace this pid + * belongs to. this only makes sence when called in the + * context of the task that belongs to the same namespace; + * pid_nr_ns() : id seen from the ns specified. + * + * see also task_xid_nr() etc in include/linux/sched.h + */ static inline pid_t pid_nr(struct pid *pid) { pid_t nr = 0; if (pid) - nr = pid->nr; + nr = pid->numbers[0].nr; + return nr; +} + +pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns); + +static inline pid_t pid_vnr(struct pid *pid) +{ + pid_t nr = 0; + if (pid) + nr = pid->numbers[pid->level].nr; return nr; } diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index b9a17e08ff0f..0135c76c76c6 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -4,7 +4,6 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/threads.h> -#include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/kref.h> @@ -20,13 +19,21 @@ struct pid_namespace { struct pidmap pidmap[PIDMAP_ENTRIES]; int last_pid; struct task_struct *child_reaper; + struct kmem_cache *pid_cachep; + int level; + struct pid_namespace *parent; +#ifdef CONFIG_PROC_FS + struct vfsmount *proc_mnt; +#endif }; extern struct pid_namespace init_pid_ns; -static inline void get_pid_ns(struct pid_namespace *ns) +static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) { - kref_get(&ns->kref); + if (ns != &init_pid_ns) + kref_get(&ns->kref); + return ns; } extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); @@ -34,12 +41,19 @@ extern void free_pid_ns(struct kref *kref); static inline void put_pid_ns(struct pid_namespace *ns) { - kref_put(&ns->kref, free_pid_ns); + if (ns != &init_pid_ns) + kref_put(&ns->kref, free_pid_ns); } -static inline struct task_struct *child_reaper(struct task_struct *tsk) +static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) { - return init_pid_ns.child_reaper; + return tsk->nsproxy->pid_ns; +} + +static inline struct task_struct *task_child_reaper(struct task_struct *tsk) +{ + BUG_ON(tsk != current); + return tsk->nsproxy->pid_ns->child_reaper; } #endif /* _LINUX_PID_NS_H */ diff --git a/include/linux/prio_heap.h b/include/linux/prio_heap.h new file mode 100644 index 000000000000..08094350f26a --- /dev/null +++ b/include/linux/prio_heap.h @@ -0,0 +1,58 @@ +#ifndef _LINUX_PRIO_HEAP_H +#define _LINUX_PRIO_HEAP_H + +/* + * Simple insertion-only static-sized priority heap containing + * pointers, based on CLR, chapter 7 + */ + +#include <linux/gfp.h> + +/** + * struct ptr_heap - simple static-sized priority heap + * @ptrs - pointer to data area + * @max - max number of elements that can be stored in @ptrs + * @size - current number of valid elements in @ptrs (in the range 0..@size-1 + * @gt: comparison operator, which should implement "greater than" + */ +struct ptr_heap { + void **ptrs; + int max; + int size; + int (*gt)(void *, void *); +}; + +/** + * heap_init - initialize an empty heap with a given memory size + * @heap: the heap structure to be initialized + * @size: amount of memory to use in bytes + * @gfp_mask: mask to pass to kmalloc() + * @gt: comparison operator, which should implement "greater than" + */ +extern int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask, + int (*gt)(void *, void *)); + +/** + * heap_free - release a heap's storage + * @heap: the heap structure whose data should be released + */ +void heap_free(struct ptr_heap *heap); + +/** + * heap_insert - insert a value into the heap and return any overflowed value + * @heap: the heap to be operated on + * @p: the pointer to be inserted + * + * Attempts to insert the given value into the priority heap. If the + * heap is full prior to the insertion, then the resulting heap will + * consist of the smallest @max elements of the original heap and the + * new element; the greatest element will be removed from the heap and + * returned. Note that the returned element will be the new element + * (i.e. no change to the heap) if the new element is greater than all + * elements currently in the heap. + */ +extern void *heap_insert(struct ptr_heap *heap, void *p); + + + +#endif /* _LINUX_PRIO_HEAP_H */ diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 20741f668f7b..1ff461672060 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -125,7 +125,8 @@ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); extern struct vfsmount *proc_mnt; -extern int proc_fill_super(struct super_block *,void *,int); +struct pid_namespace; +extern int proc_fill_super(struct super_block *); extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); /* @@ -142,6 +143,9 @@ extern const struct file_operations proc_kcore_operations; extern const struct file_operations proc_kmsg_operations; extern const struct file_operations ppc_htab_operations; +extern int pid_ns_prepare_proc(struct pid_namespace *ns); +extern void pid_ns_release_proc(struct pid_namespace *ns); + /* * proc_tty.c */ @@ -207,7 +211,9 @@ extern void proc_net_remove(struct net *net, const char *name); #define proc_net_create(net, name, mode, info) ({ (void)(mode), NULL; }) static inline void proc_net_remove(struct net *net, const char *name) {} -static inline void proc_flush_task(struct task_struct *task) { } +static inline void proc_flush_task(struct task_struct *task) +{ +} static inline struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent) { return NULL; } @@ -232,6 +238,15 @@ static inline void proc_tty_unregister_driver(struct tty_driver *driver) {}; extern struct proc_dir_entry proc_root; +static inline int pid_ns_prepare_proc(struct pid_namespace *ns) +{ + return 0; +} + +static inline void pid_ns_release_proc(struct pid_namespace *ns) +{ +} + #endif /* CONFIG_PROC_FS */ #if !defined(CONFIG_PROC_KCORE) diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 8dcf237d3386..72bfccd3da22 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -85,7 +85,7 @@ void reiserfs_warning(struct super_block *s, const char *fmt, ...); if( !( cond ) ) \ reiserfs_panic( NULL, "reiserfs[%i]: assertion " scond " failed at " \ __FILE__ ":%i:%s: " format "\n", \ - in_interrupt() ? -1 : current -> pid, __LINE__ , __FUNCTION__ , ##args ) + in_interrupt() ? -1 : task_pid_nr(current), __LINE__ , __FUNCTION__ , ##args ) #define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args) @@ -283,6 +283,18 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) return sb->s_fs_info; } +/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16 + * which overflows on large file systems. */ +static inline u32 reiserfs_bmap_count(struct super_block *sb) +{ + return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1; +} + +static inline int bmap_would_wrap(unsigned bmap_nr) +{ + return bmap_nr > ((1LL << 16) - 1); +} + /** this says about version of key of all items (but stat data) the object consists of */ #define get_inode_item_key_version( inode ) \ @@ -1734,8 +1746,8 @@ int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *, int journal_mark_freed(struct reiserfs_transaction_handle *, struct super_block *, b_blocknr_t blocknr); int journal_transaction_should_end(struct reiserfs_transaction_handle *, int); -int reiserfs_in_journal(struct super_block *p_s_sb, int bmap_nr, int bit_nr, - int searchall, b_blocknr_t * next); +int reiserfs_in_journal(struct super_block *p_s_sb, unsigned int bmap_nr, + int bit_nr, int searchall, b_blocknr_t *next); int journal_begin(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long); int journal_join_abort(struct reiserfs_transaction_handle *, @@ -1743,7 +1755,7 @@ int journal_join_abort(struct reiserfs_transaction_handle *, void reiserfs_journal_abort(struct super_block *sb, int errno); void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); int reiserfs_allocate_list_bitmaps(struct super_block *s, - struct reiserfs_list_bitmap *, int); + struct reiserfs_list_bitmap *, unsigned int); void add_save_link(struct reiserfs_transaction_handle *th, struct inode *inode, int truncate); @@ -2041,7 +2053,7 @@ struct buffer_head *get_FEB(struct tree_balance *); * arguments, such as node, search path, transaction_handle, etc. */ struct __reiserfs_blocknr_hint { struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */ - long block; /* file offset, in blocks */ + sector_t block; /* file offset, in blocks */ struct in_core_key key; struct treepath *path; /* search path, used by allocator to deternine search_start by * various ways */ @@ -2099,7 +2111,8 @@ static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb, static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle *th, struct inode *inode, b_blocknr_t * new_blocknrs, - struct treepath *path, long block) + struct treepath *path, + sector_t block) { reiserfs_blocknr_hint_t hint = { .th = th, @@ -2116,7 +2129,8 @@ static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle *th, struct inode *inode, b_blocknr_t * new_blocknrs, - struct treepath *path, long block) + struct treepath *path, + sector_t block) { reiserfs_blocknr_hint_t hint = { .th = th, diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index ff9e9234f8ba..10fa0c832018 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -265,9 +265,7 @@ enum journal_state_bits { typedef __u32(*hashf_t) (const signed char *, int); struct reiserfs_bitmap_info { - // FIXME: Won't work with block sizes > 8K - __u16 first_zero_hint; - __u16 free_count; + __u32 free_count; }; struct proc_dir_entry; diff --git a/include/linux/sched.h b/include/linux/sched.h index 10a83d8d5775..13df99fb2769 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -25,6 +25,7 @@ #define CLONE_NEWUTS 0x04000000 /* New utsname group? */ #define CLONE_NEWIPC 0x08000000 /* New ipcs */ #define CLONE_NEWUSER 0x10000000 /* New user namespace */ +#define CLONE_NEWPID 0x20000000 /* New pid namespace */ #define CLONE_NEWNET 0x40000000 /* New network namespace */ /* @@ -428,7 +429,17 @@ struct signal_struct { cputime_t it_prof_incr, it_virt_incr; /* job control IDs */ - pid_t pgrp; + + /* + * pgrp and session fields are deprecated. + * use the task_session_Xnr and task_pgrp_Xnr routines below + */ + + union { + pid_t pgrp __deprecated; + pid_t __pgrp; + }; + struct pid *tty_old_pgrp; union { @@ -736,6 +747,8 @@ struct sched_domain { #endif }; +extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new); + #endif /* CONFIG_SMP */ /* @@ -756,8 +769,6 @@ static inline int above_background_load(void) } struct io_context; /* See blkdev.h */ -struct cpuset; - #define NGROUPS_SMALL 32 #define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t))) struct group_info { @@ -1125,11 +1136,16 @@ struct task_struct { short il_next; #endif #ifdef CONFIG_CPUSETS - struct cpuset *cpuset; nodemask_t mems_allowed; int cpuset_mems_generation; int cpuset_mem_spread_rotor; #endif +#ifdef CONFIG_CGROUPS + /* Control Group info protected by css_set_lock */ + struct css_set *cgroups; + /* cg_list protected by css_set_lock and tsk->alloc_lock */ + struct list_head cg_list; +#endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT @@ -1185,24 +1201,14 @@ static inline int rt_task(struct task_struct *p) return rt_prio(p->prio); } -static inline pid_t process_group(struct task_struct *tsk) +static inline void set_task_session(struct task_struct *tsk, pid_t session) { - return tsk->signal->pgrp; + tsk->signal->__session = session; } -static inline pid_t signal_session(struct signal_struct *sig) +static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp) { - return sig->__session; -} - -static inline pid_t process_session(struct task_struct *tsk) -{ - return signal_session(tsk->signal); -} - -static inline void set_signal_session(struct signal_struct *sig, pid_t session) -{ - sig->__session = session; + tsk->signal->__pgrp = pgrp; } static inline struct pid *task_pid(struct task_struct *task) @@ -1225,6 +1231,88 @@ static inline struct pid *task_session(struct task_struct *task) return task->group_leader->pids[PIDTYPE_SID].pid; } +struct pid_namespace; + +/* + * the helpers to get the task's different pids as they are seen + * from various namespaces + * + * task_xid_nr() : global id, i.e. the id seen from the init namespace; + * task_xid_vnr() : virtual id, i.e. the id seen from the namespace the task + * belongs to. this only makes sence when called in the + * context of the task that belongs to the same namespace; + * task_xid_nr_ns() : id seen from the ns specified; + * + * set_task_vxid() : assigns a virtual id to a task; + * + * task_ppid_nr_ns() : the parent's id as seen from the namespace specified. + * the result depends on the namespace and whether the + * task in question is the namespace's init. e.g. for the + * namespace's init this will return 0 when called from + * the namespace of this init, or appropriate id otherwise. + * + * + * see also pid_nr() etc in include/linux/pid.h + */ + +static inline pid_t task_pid_nr(struct task_struct *tsk) +{ + return tsk->pid; +} + +pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); + +static inline pid_t task_pid_vnr(struct task_struct *tsk) +{ + return pid_vnr(task_pid(tsk)); +} + + +static inline pid_t task_tgid_nr(struct task_struct *tsk) +{ + return tsk->tgid; +} + +pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); + +static inline pid_t task_tgid_vnr(struct task_struct *tsk) +{ + return pid_vnr(task_tgid(tsk)); +} + + +static inline pid_t task_pgrp_nr(struct task_struct *tsk) +{ + return tsk->signal->__pgrp; +} + +pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); + +static inline pid_t task_pgrp_vnr(struct task_struct *tsk) +{ + return pid_vnr(task_pgrp(tsk)); +} + + +static inline pid_t task_session_nr(struct task_struct *tsk) +{ + return tsk->signal->__session; +} + +pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); + +static inline pid_t task_session_vnr(struct task_struct *tsk) +{ + return pid_vnr(task_session(tsk)); +} + + +static inline pid_t task_ppid_nr_ns(struct task_struct *tsk, + struct pid_namespace *ns) +{ + return pid_nr_ns(task_pid(rcu_dereference(tsk->real_parent)), ns); +} + /** * pid_alive - check that a task structure is not stale * @p: Task structure to be checked. @@ -1239,16 +1327,22 @@ static inline int pid_alive(struct task_struct *p) } /** - * is_init - check if a task structure is init + * is_global_init - check if a task structure is init * @tsk: Task structure to be checked. * * Check if a task structure is the first user space task the kernel created. */ -static inline int is_init(struct task_struct *tsk) +static inline int is_global_init(struct task_struct *tsk) { return tsk->pid == 1; } +/* + * is_container_init: + * check whether in the task is init in its own pid namespace. + */ +extern int is_container_init(struct task_struct *tsk); + extern struct pid *cad_pid; extern void free_task(struct task_struct *tsk); @@ -1420,8 +1514,32 @@ extern struct task_struct init_task; extern struct mm_struct init_mm; -#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr) -extern struct task_struct *find_task_by_pid_type(int type, int pid); +extern struct pid_namespace init_pid_ns; + +/* + * find a task by one of its numerical ids + * + * find_task_by_pid_type_ns(): + * it is the most generic call - it finds a task by all id, + * type and namespace specified + * find_task_by_pid_ns(): + * finds a task by its pid in the specified namespace + * find_task_by_vpid(): + * finds a task by its virtual pid + * find_task_by_pid(): + * finds a task by its global pid + * + * see also find_pid() etc in include/linux/pid.h + */ + +extern struct task_struct *find_task_by_pid_type_ns(int type, int pid, + struct pid_namespace *ns); + +extern struct task_struct *find_task_by_pid(pid_t nr); +extern struct task_struct *find_task_by_vpid(pid_t nr); +extern struct task_struct *find_task_by_pid_ns(pid_t nr, + struct pid_namespace *ns); + extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ @@ -1608,6 +1726,12 @@ static inline int has_group_leader_pid(struct task_struct *p) return p->pid == p->tgid; } +static inline +int same_thread_group(struct task_struct *p1, struct task_struct *p2) +{ + return p1->tgid == p2->tgid; +} + static inline struct task_struct *next_thread(const struct task_struct *p) { return list_entry(rcu_dereference(p->thread_group.next), @@ -1625,7 +1749,8 @@ static inline int thread_group_empty(struct task_struct *p) /* * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also - * pins the final release of task.io_context. Also protects ->cpuset. + * pins the final release of task.io_context. Also protects ->cpuset and + * ->cgroup.subsys[]. * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), diff --git a/include/linux/sem.h b/include/linux/sem.h index 9aaffb0b1d81..c8eaad9e4b72 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -90,7 +90,6 @@ struct sem { /* One sem_array data structure for each set of semaphores in the system. */ struct sem_array { struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ - int sem_id; time_t sem_otime; /* last semop time */ time_t sem_ctime; /* last change time */ struct sem *sem_base; /* ptr to first semaphore in array */ diff --git a/include/linux/shm.h b/include/linux/shm.h index bea65d9c93ef..eeaed921a1dc 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -79,7 +79,6 @@ struct shmid_kernel /* private to the kernel */ { struct kern_ipc_perm shm_perm; struct file * shm_file; - int id; unsigned long shm_nattch; unsigned long shm_segsz; time_t shm_atim; diff --git a/include/linux/tick.h b/include/linux/tick.h index 9a7252e089b9..f4a1395e05ff 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -40,6 +40,7 @@ enum tick_nohz_mode { * @idle_sleeps: Number of idle calls, where the sched tick was stopped * @idle_entrytime: Time when the idle call was entered * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped + * @sleep_length: Duration of the current idle sleep */ struct tick_sched { struct hrtimer sched_timer; @@ -52,6 +53,7 @@ struct tick_sched { unsigned long idle_sleeps; ktime_t idle_entrytime; ktime_t idle_sleeptime; + ktime_t sleep_length; unsigned long last_jiffies; unsigned long next_jiffies; ktime_t idle_expires; @@ -100,10 +102,17 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } extern void tick_nohz_stop_sched_tick(void); extern void tick_nohz_restart_sched_tick(void); extern void tick_nohz_update_jiffies(void); +extern ktime_t tick_nohz_get_sleep_length(void); # else static inline void tick_nohz_stop_sched_tick(void) { } static inline void tick_nohz_restart_sched_tick(void) { } static inline void tick_nohz_update_jiffies(void) { } +static inline ktime_t tick_nohz_get_sleep_length(void) +{ + ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; + + return len; +} # endif /* !NO_HZ */ #endif diff --git a/include/linux/types.h b/include/linux/types.h index 0351bf2fac85..4f0dad21c917 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -3,12 +3,9 @@ #ifdef __KERNEL__ -#define BITS_TO_LONGS(bits) \ - (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) #define DECLARE_BITMAP(name,bits) \ unsigned long name[BITS_TO_LONGS(bits)] -#define BITS_PER_BYTE 8 #endif #include <linux/posix_types.h> diff --git a/include/linux/uinput.h b/include/linux/uinput.h index a6c1e8eed226..15ddd4483b09 100644 --- a/include/linux/uinput.h +++ b/include/linux/uinput.h @@ -162,10 +162,6 @@ struct uinput_ff_erase { #define UI_FF_UPLOAD 1 #define UI_FF_ERASE 2 -#ifndef NBITS -#define NBITS(x) ((((x)-1)/(sizeof(long)*8))+1) -#endif /* NBITS */ - #define UINPUT_MAX_NAME_SIZE 80 struct uinput_user_dev { char name[UINPUT_MAX_NAME_SIZE]; diff --git a/include/linux/vt.h b/include/linux/vt.h index ba806e8711be..02c1c0288770 100644 --- a/include/linux/vt.h +++ b/include/linux/vt.h @@ -1,6 +1,18 @@ #ifndef _LINUX_VT_H #define _LINUX_VT_H +#ifdef __KERNEL__ +struct notifier_block; + +struct vt_notifier_param { + struct vc_data *vc; /* VC on which the update happened */ + unsigned int c; /* Printed char */ +}; + +extern int register_vt_notifier(struct notifier_block *nb); +extern int unregister_vt_notifier(struct notifier_block *nb); +#endif + /* * These constants are also useful for user-level apps (e.g., VC * resizing). diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index ce6badc98f6d..7daafdc2514b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -8,6 +8,7 @@ #include <linux/timer.h> #include <linux/linkage.h> #include <linux/bitops.h> +#include <linux/lockdep.h> #include <asm/atomic.h> struct workqueue_struct; @@ -28,6 +29,9 @@ struct work_struct { #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) struct list_head entry; work_func_t func; +#ifdef CONFIG_LOCKDEP + struct lockdep_map lockdep_map; +#endif }; #define WORK_DATA_INIT() ATOMIC_LONG_INIT(0) @@ -41,10 +45,23 @@ struct execute_work { struct work_struct work; }; +#ifdef CONFIG_LOCKDEP +/* + * NB: because we have to copy the lockdep_map, setting _key + * here is required, otherwise it could get initialised to the + * copy of the lockdep_map! + */ +#define __WORK_INIT_LOCKDEP_MAP(n, k) \ + .lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k), +#else +#define __WORK_INIT_LOCKDEP_MAP(n, k) +#endif + #define __WORK_INITIALIZER(n, f) { \ .data = WORK_DATA_INIT(), \ .entry = { &(n).entry, &(n).entry }, \ .func = (f), \ + __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \ } #define __DELAYED_WORK_INITIALIZER(n, f) { \ @@ -76,12 +93,24 @@ struct execute_work { * assignment of the work data initializer allows the compiler * to generate better code. */ +#ifdef CONFIG_LOCKDEP +#define INIT_WORK(_work, _func) \ + do { \ + static struct lock_class_key __key; \ + \ + (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ + lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\ + INIT_LIST_HEAD(&(_work)->entry); \ + PREPARE_WORK((_work), (_func)); \ + } while (0) +#else #define INIT_WORK(_work, _func) \ do { \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ INIT_LIST_HEAD(&(_work)->entry); \ PREPARE_WORK((_work), (_func)); \ } while (0) +#endif #define INIT_DELAYED_WORK(_work, _func) \ do { \ @@ -118,9 +147,23 @@ struct execute_work { clear_bit(WORK_STRUCT_PENDING, work_data_bits(work)) -extern struct workqueue_struct *__create_workqueue(const char *name, - int singlethread, - int freezeable); +extern struct workqueue_struct * +__create_workqueue_key(const char *name, int singlethread, + int freezeable, struct lock_class_key *key); + +#ifdef CONFIG_LOCKDEP +#define __create_workqueue(name, singlethread, freezeable) \ +({ \ + static struct lock_class_key __key; \ + \ + __create_workqueue_key((name), (singlethread), \ + (freezeable), &__key); \ +}) +#else +#define __create_workqueue(name, singlethread, freezeable) \ + __create_workqueue_key((name), (singlethread), (freezeable), NULL) +#endif + #define create_workqueue(name) __create_workqueue((name), 0, 0) #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1) #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0) |