diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 3 | ||||
-rw-r--r-- | kernel/audit.c | 24 | ||||
-rw-r--r-- | kernel/audit.h | 25 | ||||
-rw-r--r-- | kernel/auditfilter.c | 99 | ||||
-rw-r--r-- | kernel/auditsc.c | 74 | ||||
-rw-r--r-- | kernel/cgroup.c | 7 | ||||
-rw-r--r-- | kernel/compat.c | 15 | ||||
-rw-r--r-- | kernel/hrtimer.c | 22 | ||||
-rw-r--r-- | kernel/kgdb.c | 1700 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 30 | ||||
-rw-r--r-- | kernel/printk.c | 17 | ||||
-rw-r--r-- | kernel/sched_fair.c | 6 | ||||
-rw-r--r-- | kernel/semaphore.c | 264 | ||||
-rw-r--r-- | kernel/signal.c | 71 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 30 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 2 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-oneshot.c | 2 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 6 | ||||
-rw-r--r-- | kernel/timer.c | 16 | ||||
-rw-r--r-- | kernel/workqueue.c | 2 |
21 files changed, 2187 insertions, 232 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 6c584c55a6e9..6c5f081132a4 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,7 +8,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o rwsem.o nsproxy.o srcu.o \ + hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o obj-$(CONFIG_SYSCTL) += sysctl_check.o @@ -53,6 +53,7 @@ obj-$(CONFIG_AUDIT) += audit.o auditfilter.o obj-$(CONFIG_AUDITSYSCALL) += auditsc.o obj-$(CONFIG_AUDIT_TREE) += audit_tree.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o diff --git a/kernel/audit.c b/kernel/audit.c index b782b046543d..a7b16086d36f 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -21,7 +21,7 @@ * * Written by Rickard E. (Rik) Faith <faith@redhat.com> * - * Goals: 1) Integrate fully with SELinux. + * Goals: 1) Integrate fully with Security Modules. * 2) Minimal run-time overhead: * a) Minimal when syscall auditing is disabled (audit_enable=0). * b) Small when syscall auditing is enabled and no audit record @@ -55,7 +55,6 @@ #include <net/netlink.h> #include <linux/skbuff.h> #include <linux/netlink.h> -#include <linux/selinux.h> #include <linux/inotify.h> #include <linux/freezer.h> #include <linux/tty.h> @@ -265,13 +264,13 @@ static int audit_log_config_change(char *function_name, int new, int old, char *ctx = NULL; u32 len; - rc = selinux_sid_to_string(sid, &ctx, &len); + rc = security_secid_to_secctx(sid, &ctx, &len); if (rc) { audit_log_format(ab, " sid=%u", sid); allow_changes = 0; /* Something weird, deny request */ } else { audit_log_format(ab, " subj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); } } audit_log_format(ab, " res=%d", allow_changes); @@ -550,12 +549,13 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, audit_log_format(*ab, "user pid=%d uid=%u auid=%u", pid, uid, auid); if (sid) { - rc = selinux_sid_to_string(sid, &ctx, &len); + rc = security_secid_to_secctx(sid, &ctx, &len); if (rc) audit_log_format(*ab, " ssid=%u", sid); - else + else { audit_log_format(*ab, " subj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); + } } return rc; @@ -758,18 +758,18 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; } case AUDIT_SIGNAL_INFO: - err = selinux_sid_to_string(audit_sig_sid, &ctx, &len); + err = security_secid_to_secctx(audit_sig_sid, &ctx, &len); if (err) return err; sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); if (!sig_data) { - kfree(ctx); + security_release_secctx(ctx, len); return -ENOMEM; } sig_data->uid = audit_sig_uid; sig_data->pid = audit_sig_pid; memcpy(sig_data->ctx, ctx, len); - kfree(ctx); + security_release_secctx(ctx, len); audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, 0, 0, sig_data, sizeof(*sig_data) + len); kfree(sig_data); @@ -881,10 +881,6 @@ static int __init audit_init(void) audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; - /* Register the callback with selinux. This callback will be invoked - * when a new policy is loaded. */ - selinux_audit_set_callback(&selinux_audit_rule_update); - audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); #ifdef CONFIG_AUDITSYSCALL diff --git a/kernel/audit.h b/kernel/audit.h index 2554bd524fd1..3cfc54ee3e1f 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -65,34 +65,9 @@ struct audit_watch { struct list_head rules; /* associated rules */ }; -struct audit_field { - u32 type; - u32 val; - u32 op; - char *se_str; - struct selinux_audit_rule *se_rule; -}; - struct audit_tree; struct audit_chunk; -struct audit_krule { - int vers_ops; - u32 flags; - u32 listnr; - u32 action; - u32 mask[AUDIT_BITMASK_SIZE]; - u32 buflen; /* for data alloc on list rules */ - u32 field_count; - char *filterkey; /* ties events to rules */ - struct audit_field *fields; - struct audit_field *arch_f; /* quick access to arch field */ - struct audit_field *inode_f; /* quick access to an inode field */ - struct audit_watch *watch; /* associated watch */ - struct audit_tree *tree; /* associated watched tree */ - struct list_head rlist; /* entry in audit_{watch,tree}.rules list */ -}; - struct audit_entry { struct list_head list; struct rcu_head rcu; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 2f2914b7cc30..28fef6bf8534 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -28,7 +28,7 @@ #include <linux/netlink.h> #include <linux/sched.h> #include <linux/inotify.h> -#include <linux/selinux.h> +#include <linux/security.h> #include "audit.h" /* @@ -38,7 +38,7 @@ * Synchronizes writes and blocking reads of audit's filterlist * data. Rcu is used to traverse the filterlist and access * contents of structs audit_entry, audit_watch and opaque - * selinux rules during filtering. If modified, these structures + * LSM rules during filtering. If modified, these structures * must be copied and replace their counterparts in the filterlist. * An audit_parent struct is not accessed during filtering, so may * be written directly provided audit_filter_mutex is held. @@ -139,8 +139,8 @@ static inline void audit_free_rule(struct audit_entry *e) if (e->rule.fields) for (i = 0; i < e->rule.field_count; i++) { struct audit_field *f = &e->rule.fields[i]; - kfree(f->se_str); - selinux_audit_rule_free(f->se_rule); + kfree(f->lsm_str); + security_audit_rule_free(f->lsm_rule); } kfree(e->rule.fields); kfree(e->rule.filterkey); @@ -554,8 +554,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, f->op = data->fieldflags[i] & AUDIT_OPERATORS; f->type = data->fields[i]; f->val = data->values[i]; - f->se_str = NULL; - f->se_rule = NULL; + f->lsm_str = NULL; + f->lsm_rule = NULL; switch(f->type) { case AUDIT_PID: case AUDIT_UID: @@ -597,12 +597,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, goto exit_free; entry->rule.buflen += f->val; - err = selinux_audit_rule_init(f->type, f->op, str, - &f->se_rule); + err = security_audit_rule_init(f->type, f->op, str, + (void **)&f->lsm_rule); /* Keep currently invalid fields around in case they * become valid after a policy reload. */ if (err == -EINVAL) { - printk(KERN_WARNING "audit rule for selinux " + printk(KERN_WARNING "audit rule for LSM " "\'%s\' is invalid\n", str); err = 0; } @@ -610,7 +610,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, kfree(str); goto exit_free; } else - f->se_str = str; + f->lsm_str = str; break; case AUDIT_WATCH: str = audit_unpack_string(&bufp, &remain, f->val); @@ -754,7 +754,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: data->buflen += data->values[i] = - audit_pack_string(&bufp, f->se_str); + audit_pack_string(&bufp, f->lsm_str); break; case AUDIT_WATCH: data->buflen += data->values[i] = @@ -806,7 +806,7 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) case AUDIT_OBJ_TYPE: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: - if (strcmp(a->fields[i].se_str, b->fields[i].se_str)) + if (strcmp(a->fields[i].lsm_str, b->fields[i].lsm_str)) return 1; break; case AUDIT_WATCH: @@ -862,28 +862,28 @@ out: return new; } -/* Duplicate selinux field information. The se_rule is opaque, so must be +/* Duplicate LSM field information. The lsm_rule is opaque, so must be * re-initialized. */ -static inline int audit_dupe_selinux_field(struct audit_field *df, +static inline int audit_dupe_lsm_field(struct audit_field *df, struct audit_field *sf) { int ret = 0; - char *se_str; + char *lsm_str; - /* our own copy of se_str */ - se_str = kstrdup(sf->se_str, GFP_KERNEL); - if (unlikely(!se_str)) + /* our own copy of lsm_str */ + lsm_str = kstrdup(sf->lsm_str, GFP_KERNEL); + if (unlikely(!lsm_str)) return -ENOMEM; - df->se_str = se_str; + df->lsm_str = lsm_str; - /* our own (refreshed) copy of se_rule */ - ret = selinux_audit_rule_init(df->type, df->op, df->se_str, - &df->se_rule); + /* our own (refreshed) copy of lsm_rule */ + ret = security_audit_rule_init(df->type, df->op, df->lsm_str, + (void **)&df->lsm_rule); /* Keep currently invalid fields around in case they * become valid after a policy reload. */ if (ret == -EINVAL) { - printk(KERN_WARNING "audit rule for selinux \'%s\' is " - "invalid\n", df->se_str); + printk(KERN_WARNING "audit rule for LSM \'%s\' is " + "invalid\n", df->lsm_str); ret = 0; } @@ -891,7 +891,7 @@ static inline int audit_dupe_selinux_field(struct audit_field *df, } /* Duplicate an audit rule. This will be a deep copy with the exception - * of the watch - that pointer is carried over. The selinux specific fields + * of the watch - that pointer is carried over. The LSM specific fields * will be updated in the copy. The point is to be able to replace the old * rule with the new rule in the filterlist, then free the old rule. * The rlist element is undefined; list manipulations are handled apart from @@ -930,7 +930,7 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old, new->tree = old->tree; memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount); - /* deep copy this information, updating the se_rule fields, because + /* deep copy this information, updating the lsm_rule fields, because * the originals will all be freed when the old rule is freed. */ for (i = 0; i < fcount; i++) { switch (new->fields[i].type) { @@ -944,7 +944,7 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old, case AUDIT_OBJ_TYPE: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: - err = audit_dupe_selinux_field(&new->fields[i], + err = audit_dupe_lsm_field(&new->fields[i], &old->fields[i]); break; case AUDIT_FILTERKEY: @@ -1515,11 +1515,12 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, if (sid) { char *ctx = NULL; u32 len; - if (selinux_sid_to_string(sid, &ctx, &len)) + if (security_secid_to_secctx(sid, &ctx, &len)) audit_log_format(ab, " ssid=%u", sid); - else + else { audit_log_format(ab, " subj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); + } } audit_log_format(ab, " op=%s rule key=", action); if (rule->filterkey) @@ -1761,38 +1762,12 @@ unlock_and_return: return result; } -/* Check to see if the rule contains any selinux fields. Returns 1 if there - are selinux fields specified in the rule, 0 otherwise. */ -static inline int audit_rule_has_selinux(struct audit_krule *rule) -{ - int i; - - for (i = 0; i < rule->field_count; i++) { - struct audit_field *f = &rule->fields[i]; - switch (f->type) { - case AUDIT_SUBJ_USER: - case AUDIT_SUBJ_ROLE: - case AUDIT_SUBJ_TYPE: - case AUDIT_SUBJ_SEN: - case AUDIT_SUBJ_CLR: - case AUDIT_OBJ_USER: - case AUDIT_OBJ_ROLE: - case AUDIT_OBJ_TYPE: - case AUDIT_OBJ_LEV_LOW: - case AUDIT_OBJ_LEV_HIGH: - return 1; - } - } - - return 0; -} - -/* This function will re-initialize the se_rule field of all applicable rules. - * It will traverse the filter lists serarching for rules that contain selinux +/* This function will re-initialize the lsm_rule field of all applicable rules. + * It will traverse the filter lists serarching for rules that contain LSM * specific filter fields. When such a rule is found, it is copied, the - * selinux field is re-initialized, and the old rule is replaced with the + * LSM field is re-initialized, and the old rule is replaced with the * updated rule. */ -int selinux_audit_rule_update(void) +int audit_update_lsm_rules(void) { struct audit_entry *entry, *n, *nentry; struct audit_watch *watch; @@ -1804,7 +1779,7 @@ int selinux_audit_rule_update(void) for (i = 0; i < AUDIT_NR_FILTERS; i++) { list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) { - if (!audit_rule_has_selinux(&entry->rule)) + if (!security_audit_rule_known(&entry->rule)) continue; watch = entry->rule.watch; @@ -1815,7 +1790,7 @@ int selinux_audit_rule_update(void) * return value */ if (!err) err = PTR_ERR(nentry); - audit_panic("error updating selinux filters"); + audit_panic("error updating LSM filters"); if (watch) list_del(&entry->rule.rlist); list_del_rcu(&entry->list); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 782262e4107d..56e56ed594a8 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -61,7 +61,6 @@ #include <linux/security.h> #include <linux/list.h> #include <linux/tty.h> -#include <linux/selinux.h> #include <linux/binfmts.h> #include <linux/highmem.h> #include <linux/syscalls.h> @@ -528,14 +527,14 @@ static int audit_filter_rules(struct task_struct *tsk, match for now to avoid losing information that may be wanted. An error message will also be logged upon error */ - if (f->se_rule) { + if (f->lsm_rule) { if (need_sid) { - selinux_get_task_sid(tsk, &sid); + security_task_getsecid(tsk, &sid); need_sid = 0; } - result = selinux_audit_rule_match(sid, f->type, + result = security_audit_rule_match(sid, f->type, f->op, - f->se_rule, + f->lsm_rule, ctx); } break; @@ -546,18 +545,18 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_OBJ_LEV_HIGH: /* The above note for AUDIT_SUBJ_USER...AUDIT_SUBJ_CLR also applies here */ - if (f->se_rule) { + if (f->lsm_rule) { /* Find files that match */ if (name) { - result = selinux_audit_rule_match( + result = security_audit_rule_match( name->osid, f->type, f->op, - f->se_rule, ctx); + f->lsm_rule, ctx); } else if (ctx) { for (j = 0; j < ctx->name_count; j++) { - if (selinux_audit_rule_match( + if (security_audit_rule_match( ctx->names[j].osid, f->type, f->op, - f->se_rule, ctx)) { + f->lsm_rule, ctx)) { ++result; break; } @@ -570,7 +569,7 @@ static int audit_filter_rules(struct task_struct *tsk, aux = aux->next) { if (aux->type == AUDIT_IPC) { struct audit_aux_data_ipcctl *axi = (void *)aux; - if (selinux_audit_rule_match(axi->osid, f->type, f->op, f->se_rule, ctx)) { + if (security_audit_rule_match(axi->osid, f->type, f->op, f->lsm_rule, ctx)) { ++result; break; } @@ -885,11 +884,11 @@ void audit_log_task_context(struct audit_buffer *ab) int error; u32 sid; - selinux_get_task_sid(current, &sid); + security_task_getsecid(current, &sid); if (!sid) return; - error = selinux_sid_to_string(sid, &ctx, &len); + error = security_secid_to_secctx(sid, &ctx, &len); if (error) { if (error != -EINVAL) goto error_path; @@ -897,7 +896,7 @@ void audit_log_task_context(struct audit_buffer *ab) } audit_log_format(ab, " subj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); return; error_path: @@ -941,7 +940,7 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, u32 sid, char *comm) { struct audit_buffer *ab; - char *s = NULL; + char *ctx = NULL; u32 len; int rc = 0; @@ -951,15 +950,16 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, auid, uid, sessionid); - if (selinux_sid_to_string(sid, &s, &len)) { + if (security_secid_to_secctx(sid, &ctx, &len)) { audit_log_format(ab, " obj=(none)"); rc = 1; - } else - audit_log_format(ab, " obj=%s", s); + } else { + audit_log_format(ab, " obj=%s", ctx); + security_release_secctx(ctx, len); + } audit_log_format(ab, " ocomm="); audit_log_untrustedstring(ab, comm); audit_log_end(ab); - kfree(s); return rc; } @@ -1271,14 +1271,15 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (axi->osid != 0) { char *ctx = NULL; u32 len; - if (selinux_sid_to_string( + if (security_secid_to_secctx( axi->osid, &ctx, &len)) { audit_log_format(ab, " osid=%u", axi->osid); call_panic = 1; - } else + } else { audit_log_format(ab, " obj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); + } } break; } @@ -1392,13 +1393,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (n->osid != 0) { char *ctx = NULL; u32 len; - if (selinux_sid_to_string( + if (security_secid_to_secctx( n->osid, &ctx, &len)) { audit_log_format(ab, " osid=%u", n->osid); call_panic = 2; - } else + } else { audit_log_format(ab, " obj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); + } } audit_log_end(ab); @@ -1775,7 +1777,7 @@ static void audit_copy_inode(struct audit_names *name, const struct inode *inode name->uid = inode->i_uid; name->gid = inode->i_gid; name->rdev = inode->i_rdev; - selinux_get_inode_sid(inode, &name->osid); + security_inode_getsecid(inode, &name->osid); } /** @@ -2190,8 +2192,7 @@ int __audit_ipc_obj(struct kern_ipc_perm *ipcp) ax->uid = ipcp->uid; ax->gid = ipcp->gid; ax->mode = ipcp->mode; - selinux_get_ipc_sid(ipcp, &ax->osid); - + security_ipc_getsecid(ipcp, &ax->osid); ax->d.type = AUDIT_IPC; ax->d.next = context->aux; context->aux = (void *)ax; @@ -2343,7 +2344,7 @@ void __audit_ptrace(struct task_struct *t) context->target_auid = audit_get_loginuid(t); context->target_uid = t->uid; context->target_sessionid = audit_get_sessionid(t); - selinux_get_task_sid(t, &context->target_sid); + security_task_getsecid(t, &context->target_sid); memcpy(context->target_comm, t->comm, TASK_COMM_LEN); } @@ -2371,7 +2372,7 @@ int __audit_signal_info(int sig, struct task_struct *t) audit_sig_uid = tsk->loginuid; else audit_sig_uid = tsk->uid; - selinux_get_task_sid(tsk, &audit_sig_sid); + security_task_getsecid(tsk, &audit_sig_sid); } if (!audit_signals || audit_dummy_context()) return 0; @@ -2384,7 +2385,7 @@ int __audit_signal_info(int sig, struct task_struct *t) ctx->target_auid = audit_get_loginuid(t); ctx->target_uid = t->uid; ctx->target_sessionid = audit_get_sessionid(t); - selinux_get_task_sid(t, &ctx->target_sid); + security_task_getsecid(t, &ctx->target_sid); memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN); return 0; } @@ -2405,7 +2406,7 @@ int __audit_signal_info(int sig, struct task_struct *t) axp->target_auid[axp->pid_count] = audit_get_loginuid(t); axp->target_uid[axp->pid_count] = t->uid; axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t); - selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]); + security_task_getsecid(t, &axp->target_sid[axp->pid_count]); memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN); axp->pid_count++; @@ -2435,16 +2436,17 @@ void audit_core_dumps(long signr) ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u", auid, current->uid, current->gid, sessionid); - selinux_get_task_sid(current, &sid); + security_task_getsecid(current, &sid); if (sid) { char *ctx = NULL; u32 len; - if (selinux_sid_to_string(sid, &ctx, &len)) + if (security_secid_to_secctx(sid, &ctx, &len)) audit_log_format(ab, " ssid=%u", sid); - else + else { audit_log_format(ab, " subj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); + } } audit_log_format(ab, " pid=%d comm=", current->pid); audit_log_untrustedstring(ab, current->comm); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2727f9238359..6d8de051382b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1722,7 +1722,12 @@ void cgroup_enable_task_cg_lists(void) use_task_css_set_links = 1; do_each_thread(g, p) { task_lock(p); - if (list_empty(&p->cg_list)) + /* + * We should check if the process is exiting, otherwise + * it will race with cgroup_exit() in that the list + * entry won't be deleted though the process has exited. + */ + if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list)) list_add(&p->cg_list, &p->cgroups->tasks); task_unlock(p); } while_each_thread(g, p); diff --git a/kernel/compat.c b/kernel/compat.c index 5f0e201bcfd3..9c48abfcd4a5 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -47,15 +47,14 @@ static long compat_nanosleep_restart(struct restart_block *restart) mm_segment_t oldfs; long ret; - rmtp = (struct compat_timespec __user *)(restart->arg1); - restart->arg1 = (unsigned long)&rmt; + restart->nanosleep.rmtp = (struct timespec __user *) &rmt; oldfs = get_fs(); set_fs(KERNEL_DS); ret = hrtimer_nanosleep_restart(restart); set_fs(oldfs); if (ret) { - restart->arg1 = (unsigned long)rmtp; + rmtp = restart->nanosleep.compat_rmtp; if (rmtp && put_compat_timespec(&rmt, rmtp)) return -EFAULT; @@ -89,7 +88,7 @@ asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp, = ¤t_thread_info()->restart_block; restart->fn = compat_nanosleep_restart; - restart->arg1 = (unsigned long)rmtp; + restart->nanosleep.compat_rmtp = rmtp; if (rmtp && put_compat_timespec(&rmt, rmtp)) return -EFAULT; @@ -607,9 +606,9 @@ static long compat_clock_nanosleep_restart(struct restart_block *restart) long err; mm_segment_t oldfs; struct timespec tu; - struct compat_timespec *rmtp = (struct compat_timespec *)(restart->arg1); + struct compat_timespec *rmtp = restart->nanosleep.compat_rmtp; - restart->arg1 = (unsigned long) &tu; + restart->nanosleep.rmtp = (struct timespec __user *) &tu; oldfs = get_fs(); set_fs(KERNEL_DS); err = clock_nanosleep_restart(restart); @@ -621,7 +620,7 @@ static long compat_clock_nanosleep_restart(struct restart_block *restart) if (err == -ERESTART_RESTARTBLOCK) { restart->fn = compat_clock_nanosleep_restart; - restart->arg1 = (unsigned long) rmtp; + restart->nanosleep.compat_rmtp = rmtp; } return err; } @@ -652,7 +651,7 @@ long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, if (err == -ERESTART_RESTARTBLOCK) { restart = ¤t_thread_info()->restart_block; restart->fn = compat_clock_nanosleep_restart; - restart->arg1 = (unsigned long) rmtp; + restart->nanosleep.compat_rmtp = rmtp; } return err; } diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 98bee013f71f..c642ef75069f 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1354,13 +1354,13 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) struct hrtimer_sleeper t; struct timespec __user *rmtp; - hrtimer_init(&t.timer, restart->arg0, HRTIMER_MODE_ABS); - t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; + hrtimer_init(&t.timer, restart->nanosleep.index, HRTIMER_MODE_ABS); + t.timer.expires.tv64 = restart->nanosleep.expires; if (do_nanosleep(&t, HRTIMER_MODE_ABS)) return 0; - rmtp = (struct timespec __user *)restart->arg1; + rmtp = restart->nanosleep.rmtp; if (rmtp) { int ret = update_rmtp(&t.timer, rmtp); if (ret <= 0) @@ -1394,10 +1394,9 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, restart = ¤t_thread_info()->restart_block; restart->fn = hrtimer_nanosleep_restart; - restart->arg0 = (unsigned long) t.timer.base->index; - restart->arg1 = (unsigned long) rmtp; - restart->arg2 = t.timer.expires.tv64 & 0xFFFFFFFF; - restart->arg3 = t.timer.expires.tv64 >> 32; + restart->nanosleep.index = t.timer.base->index; + restart->nanosleep.rmtp = rmtp; + restart->nanosleep.expires = t.timer.expires.tv64; return -ERESTART_RESTARTBLOCK; } @@ -1425,7 +1424,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu) int i; spin_lock_init(&cpu_base->lock); - lockdep_set_class(&cpu_base->lock, &cpu_base->lock_key); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) cpu_base->clock_base[i].cpu_base = cpu_base; @@ -1466,16 +1464,16 @@ static void migrate_hrtimers(int cpu) tick_cancel_sched_timer(cpu); local_irq_disable(); - double_spin_lock(&new_base->lock, &old_base->lock, - smp_processor_id() < cpu); + spin_lock(&new_base->lock); + spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], &new_base->clock_base[i]); } - double_spin_unlock(&new_base->lock, &old_base->lock, - smp_processor_id() < cpu); + spin_unlock(&old_base->lock); + spin_unlock(&new_base->lock); local_irq_enable(); put_cpu_var(hrtimer_bases); } diff --git a/kernel/kgdb.c b/kernel/kgdb.c new file mode 100644 index 000000000000..1bd0ec1c80b2 --- /dev/null +++ b/kernel/kgdb.c @@ -0,0 +1,1700 @@ +/* + * KGDB stub. + * + * Maintainer: Jason Wessel <jason.wessel@windriver.com> + * + * Copyright (C) 2000-2001 VERITAS Software Corporation. + * Copyright (C) 2002-2004 Timesys Corporation + * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com> + * Copyright (C) 2004 Pavel Machek <pavel@suse.cz> + * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org> + * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd. + * Copyright (C) 2005-2008 Wind River Systems, Inc. + * Copyright (C) 2007 MontaVista Software, Inc. + * Copyright (C) 2008 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * + * Contributors at various stages not listed above: + * Jason Wessel ( jason.wessel@windriver.com ) + * George Anzinger <george@mvista.com> + * Anurekh Saxena (anurekh.saxena@timesys.com) + * Lake Stevens Instrument Division (Glenn Engel) + * Jim Kingdon, Cygnus Support. + * + * Original KGDB stub: David Grothe <dave@gcom.com>, + * Tigran Aivazian <tigran@sco.com> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ +#include <linux/pid_namespace.h> +#include <linux/clocksource.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/console.h> +#include <linux/threads.h> +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/ptrace.h> +#include <linux/reboot.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/sysrq.h> +#include <linux/init.h> +#include <linux/kgdb.h> +#include <linux/pid.h> +#include <linux/smp.h> +#include <linux/mm.h> + +#include <asm/cacheflush.h> +#include <asm/byteorder.h> +#include <asm/atomic.h> +#include <asm/system.h> + +static int kgdb_break_asap; + +struct kgdb_state { + int ex_vector; + int signo; + int err_code; + int cpu; + int pass_exception; + long threadid; + long kgdb_usethreadid; + struct pt_regs *linux_regs; +}; + +static struct debuggerinfo_struct { + void *debuggerinfo; + struct task_struct *task; +} kgdb_info[NR_CPUS]; + +/** + * kgdb_connected - Is a host GDB connected to us? + */ +int kgdb_connected; +EXPORT_SYMBOL_GPL(kgdb_connected); + +/* All the KGDB handlers are installed */ +static int kgdb_io_module_registered; + +/* Guard for recursive entry */ +static int exception_level; + +static struct kgdb_io *kgdb_io_ops; +static DEFINE_SPINLOCK(kgdb_registration_lock); + +/* kgdb console driver is loaded */ +static int kgdb_con_registered; +/* determine if kgdb console output should be used */ +static int kgdb_use_con; + +static int __init opt_kgdb_con(char *str) +{ + kgdb_use_con = 1; + return 0; +} + +early_param("kgdbcon", opt_kgdb_con); + +module_param(kgdb_use_con, int, 0644); + +/* + * Holds information about breakpoints in a kernel. These breakpoints are + * added and removed by gdb. + */ +static struct kgdb_bkpt kgdb_break[KGDB_MAX_BREAKPOINTS] = { + [0 ... KGDB_MAX_BREAKPOINTS-1] = { .state = BP_UNDEFINED } +}; + +/* + * The CPU# of the active CPU, or -1 if none: + */ +atomic_t kgdb_active = ATOMIC_INIT(-1); + +/* + * We use NR_CPUs not PERCPU, in case kgdb is used to debug early + * bootup code (which might not have percpu set up yet): + */ +static atomic_t passive_cpu_wait[NR_CPUS]; +static atomic_t cpu_in_kgdb[NR_CPUS]; +atomic_t kgdb_setting_breakpoint; + +struct task_struct *kgdb_usethread; +struct task_struct *kgdb_contthread; + +int kgdb_single_step; + +/* Our I/O buffers. */ +static char remcom_in_buffer[BUFMAX]; +static char remcom_out_buffer[BUFMAX]; + +/* Storage for the registers, in GDB format. */ +static unsigned long gdb_regs[(NUMREGBYTES + + sizeof(unsigned long) - 1) / + sizeof(unsigned long)]; + +/* to keep track of the CPU which is doing the single stepping*/ +atomic_t kgdb_cpu_doing_single_step = ATOMIC_INIT(-1); + +/* + * If you are debugging a problem where roundup (the collection of + * all other CPUs) is a problem [this should be extremely rare], + * then use the nokgdbroundup option to avoid roundup. In that case + * the other CPUs might interfere with your debugging context, so + * use this with care: + */ +int kgdb_do_roundup = 1; + +static int __init opt_nokgdbroundup(char *str) +{ + kgdb_do_roundup = 0; + + return 0; +} + +early_param("nokgdbroundup", opt_nokgdbroundup); + +/* + * Finally, some KGDB code :-) + */ + +/* + * Weak aliases for breakpoint management, + * can be overriden by architectures when needed: + */ +int __weak kgdb_validate_break_address(unsigned long addr) +{ + char tmp_variable[BREAK_INSTR_SIZE]; + + return probe_kernel_read(tmp_variable, (char *)addr, BREAK_INSTR_SIZE); +} + +int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr) +{ + int err; + + err = probe_kernel_read(saved_instr, (char *)addr, BREAK_INSTR_SIZE); + if (err) + return err; + + return probe_kernel_write((char *)addr, arch_kgdb_ops.gdb_bpt_instr, + BREAK_INSTR_SIZE); +} + +int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle) +{ + return probe_kernel_write((char *)addr, + (char *)bundle, BREAK_INSTR_SIZE); +} + +unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs) +{ + return instruction_pointer(regs); +} + +int __weak kgdb_arch_init(void) +{ + return 0; +} + +int __weak kgdb_skipexception(int exception, struct pt_regs *regs) +{ + return 0; +} + +void __weak +kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code) +{ + return; +} + +/** + * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb. + * @regs: Current &struct pt_regs. + * + * This function will be called if the particular architecture must + * disable hardware debugging while it is processing gdb packets or + * handling exception. + */ +void __weak kgdb_disable_hw_debug(struct pt_regs *regs) +{ +} + +/* + * GDB remote protocol parser: + */ + +static const char hexchars[] = "0123456789abcdef"; + +static int hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return ch - 'a' + 10; + if ((ch >= '0') && (ch <= '9')) + return ch - '0'; + if ((ch >= 'A') && (ch <= 'F')) + return ch - 'A' + 10; + return -1; +} + +/* scan for the sequence $<data>#<checksum> */ +static void get_packet(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int count; + char ch; + + do { + /* + * Spin and wait around for the start character, ignore all + * other characters: + */ + while ((ch = (kgdb_io_ops->read_char())) != '$') + /* nothing */; + + kgdb_connected = 1; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* + * now, read until a # or end of buffer is found: + */ + while (count < (BUFMAX - 1)) { + ch = kgdb_io_ops->read_char(); + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(kgdb_io_ops->read_char()) << 4; + xmitcsum += hex(kgdb_io_ops->read_char()); + + if (checksum != xmitcsum) + /* failed checksum */ + kgdb_io_ops->write_char('-'); + else + /* successful transfer */ + kgdb_io_ops->write_char('+'); + if (kgdb_io_ops->flush) + kgdb_io_ops->flush(); + } + } while (checksum != xmitcsum); +} + +/* + * Send the packet in buffer. + * Check for gdb connection if asked for. + */ +static void put_packet(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + /* + * $<packet info>#<checksum>. + */ + while (1) { + kgdb_io_ops->write_char('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + kgdb_io_ops->write_char(ch); + checksum += ch; + count++; + } + + kgdb_io_ops->write_char('#'); + kgdb_io_ops->write_char(hexchars[checksum >> 4]); + kgdb_io_ops->write_char(hexchars[checksum & 0xf]); + if (kgdb_io_ops->flush) + kgdb_io_ops->flush(); + + /* Now see what we get in reply. */ + ch = kgdb_io_ops->read_char(); + + if (ch == 3) + ch = kgdb_io_ops->read_char(); + + /* If we get an ACK, we are done. */ + if (ch == '+') + return; + + /* + * If we get the start of another packet, this means + * that GDB is attempting to reconnect. We will NAK + * the packet being sent, and stop trying to send this + * packet. + */ + if (ch == '$') { + kgdb_io_ops->write_char('-'); + if (kgdb_io_ops->flush) + kgdb_io_ops->flush(); + return; + } + } +} + +static char *pack_hex_byte(char *pkt, u8 byte) +{ + *pkt++ = hexchars[byte >> 4]; + *pkt++ = hexchars[byte & 0xf]; + + return pkt; +} + +/* + * Convert the memory pointed to by mem into hex, placing result in buf. + * Return a pointer to the last char put in buf (null). May return an error. + */ +int kgdb_mem2hex(char *mem, char *buf, int count) +{ + char *tmp; + int err; + + /* + * We use the upper half of buf as an intermediate buffer for the + * raw memory copy. Hex conversion will work against this one. + */ + tmp = buf + count; + + err = probe_kernel_read(tmp, mem, count); + if (!err) { + while (count > 0) { + buf = pack_hex_byte(buf, *tmp); + tmp++; + count--; + } + + *buf = 0; + } + + return err; +} + +/* + * Copy the binary array pointed to by buf into mem. Fix $, #, and + * 0x7d escaped with 0x7d. Return a pointer to the character after + * the last byte written. + */ +static int kgdb_ebin2mem(char *buf, char *mem, int count) +{ + int err = 0; + char c; + + while (count-- > 0) { + c = *buf++; + if (c == 0x7d) + c = *buf++ ^ 0x20; + + err = probe_kernel_write(mem, &c, 1); + if (err) + break; + + mem++; + } + + return err; +} + +/* + * Convert the hex array pointed to by buf into binary to be placed in mem. + * Return a pointer to the character AFTER the last byte written. + * May return an error. + */ +int kgdb_hex2mem(char *buf, char *mem, int count) +{ + char *tmp_raw; + char *tmp_hex; + + /* + * We use the upper half of buf as an intermediate buffer for the + * raw memory that is converted from hex. + */ + tmp_raw = buf + count * 2; + + tmp_hex = tmp_raw - 1; + while (tmp_hex >= buf) { + tmp_raw--; + *tmp_raw = hex(*tmp_hex--); + *tmp_raw |= hex(*tmp_hex--) << 4; + } + + return probe_kernel_write(mem, tmp_raw, count); +} + +/* + * While we find nice hex chars, build a long_val. + * Return number of chars processed. + */ +int kgdb_hex2long(char **ptr, long *long_val) +{ + int hex_val; + int num = 0; + + *long_val = 0; + + while (**ptr) { + hex_val = hex(**ptr); + if (hex_val < 0) + break; + + *long_val = (*long_val << 4) | hex_val; + num++; + (*ptr)++; + } + + return num; +} + +/* Write memory due to an 'M' or 'X' packet. */ +static int write_mem_msg(int binary) +{ + char *ptr = &remcom_in_buffer[1]; + unsigned long addr; + unsigned long length; + int err; + + if (kgdb_hex2long(&ptr, &addr) > 0 && *(ptr++) == ',' && + kgdb_hex2long(&ptr, &length) > 0 && *(ptr++) == ':') { + if (binary) + err = kgdb_ebin2mem(ptr, (char *)addr, length); + else + err = kgdb_hex2mem(ptr, (char *)addr, length); + if (err) + return err; + if (CACHE_FLUSH_IS_SAFE) + flush_icache_range(addr, addr + length + 1); + return 0; + } + + return -EINVAL; +} + +static void error_packet(char *pkt, int error) +{ + error = -error; + pkt[0] = 'E'; + pkt[1] = hexchars[(error / 10)]; + pkt[2] = hexchars[(error % 10)]; + pkt[3] = '\0'; +} + +/* + * Thread ID accessors. We represent a flat TID space to GDB, where + * the per CPU idle threads (which under Linux all have PID 0) are + * remapped to negative TIDs. + */ + +#define BUF_THREAD_ID_SIZE 16 + +static char *pack_threadid(char *pkt, unsigned char *id) +{ + char *limit; + + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *id++); + + return pkt; +} + +static void int_to_threadref(unsigned char *id, int value) +{ + unsigned char *scan; + int i = 4; + + scan = (unsigned char *)id; + while (i--) + *scan++ = 0; + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} + +static struct task_struct *getthread(struct pt_regs *regs, int tid) +{ + /* + * Non-positive TIDs are remapped idle tasks: + */ + if (tid <= 0) + return idle_task(-tid); + + /* + * find_task_by_pid_ns() does not take the tasklist lock anymore + * but is nicely RCU locked - hence is a pretty resilient + * thing to use: + */ + return find_task_by_pid_ns(tid, &init_pid_ns); +} + +/* + * CPU debug state control: + */ + +#ifdef CONFIG_SMP +static void kgdb_wait(struct pt_regs *regs) +{ + unsigned long flags; + int cpu; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + kgdb_info[cpu].debuggerinfo = regs; + kgdb_info[cpu].task = current; + /* + * Make sure the above info reaches the primary CPU before + * our cpu_in_kgdb[] flag setting does: + */ + smp_wmb(); + atomic_set(&cpu_in_kgdb[cpu], 1); + + /* Wait till primary CPU is done with debugging */ + while (atomic_read(&passive_cpu_wait[cpu])) + cpu_relax(); + + kgdb_info[cpu].debuggerinfo = NULL; + kgdb_info[cpu].task = NULL; + + /* fix up hardware debug registers on local cpu */ + if (arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); + + /* Signal the primary CPU that we are done: */ + atomic_set(&cpu_in_kgdb[cpu], 0); + clocksource_touch_watchdog(); + local_irq_restore(flags); +} +#endif + +/* + * Some architectures need cache flushes when we set/clear a + * breakpoint: + */ +static void kgdb_flush_swbreak_addr(unsigned long addr) +{ + if (!CACHE_FLUSH_IS_SAFE) + return; + + if (current->mm && current->mm->mmap_cache) { + flush_cache_range(current->mm->mmap_cache, + addr, addr + BREAK_INSTR_SIZE); + } + /* Force flush instruction cache if it was outside the mm */ + flush_icache_range(addr, addr + BREAK_INSTR_SIZE); +} + +/* + * SW breakpoint management: + */ +static int kgdb_activate_sw_breakpoints(void) +{ + unsigned long addr; + int error = 0; + int i; + + for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { + if (kgdb_break[i].state != BP_SET) + continue; + + addr = kgdb_break[i].bpt_addr; + error = kgdb_arch_set_breakpoint(addr, + kgdb_break[i].saved_instr); + if (error) + return error; + + kgdb_flush_swbreak_addr(addr); + kgdb_break[i].state = BP_ACTIVE; + } + return 0; +} + +static int kgdb_set_sw_break(unsigned long addr) +{ + int err = kgdb_validate_break_address(addr); + int breakno = -1; + int i; + + if (err) + return err; + + for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { + if ((kgdb_break[i].state == BP_SET) && + (kgdb_break[i].bpt_addr == addr)) + return -EEXIST; + } + for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { + if (kgdb_break[i].state == BP_REMOVED && + kgdb_break[i].bpt_addr == addr) { + breakno = i; + break; + } + } + + if (breakno == -1) { + for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { + if (kgdb_break[i].state == BP_UNDEFINED) { + breakno = i; + break; + } + } + } + + if (breakno == -1) + return -E2BIG; + + kgdb_break[breakno].state = BP_SET; + kgdb_break[breakno].type = BP_BREAKPOINT; + kgdb_break[breakno].bpt_addr = addr; + + return 0; +} + +static int kgdb_deactivate_sw_breakpoints(void) +{ + unsigned long addr; + int error = 0; + int i; + + for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { + if (kgdb_break[i].state != BP_ACTIVE) + continue; + addr = kgdb_break[i].bpt_addr; + error = kgdb_arch_remove_breakpoint(addr, + kgdb_break[i].saved_instr); + if (error) + return error; + + kgdb_flush_swbreak_addr(addr); + kgdb_break[i].state = BP_SET; + } + return 0; +} + +static int kgdb_remove_sw_break(unsigned long addr) +{ + int i; + + for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { + if ((kgdb_break[i].state == BP_SET) && + (kgdb_break[i].bpt_addr == addr)) { + kgdb_break[i].state = BP_REMOVED; + return 0; + } + } + return -ENOENT; +} + +int kgdb_isremovedbreak(unsigned long addr) +{ + int i; + + for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { + if ((kgdb_break[i].state == BP_REMOVED) && + (kgdb_break[i].bpt_addr == addr)) + return 1; + } + return 0; +} + +int remove_all_break(void) +{ + unsigned long addr; + int error; + int i; + + /* Clear memory breakpoints. */ + for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { + if (kgdb_break[i].state != BP_ACTIVE) + goto setundefined; + addr = kgdb_break[i].bpt_addr; + error = kgdb_arch_remove_breakpoint(addr, + kgdb_break[i].saved_instr); + if (error) + printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n", + addr); +setundefined: + kgdb_break[i].state = BP_UNDEFINED; + } + + /* Clear hardware breakpoints. */ + if (arch_kgdb_ops.remove_all_hw_break) + arch_kgdb_ops.remove_all_hw_break(); + + return 0; +} + +/* + * Remap normal tasks to their real PID, idle tasks to -1 ... -NR_CPUs: + */ +static inline int shadow_pid(int realpid) +{ + if (realpid) + return realpid; + + return -1-raw_smp_processor_id(); +} + +static char gdbmsgbuf[BUFMAX + 1]; + +static void kgdb_msg_write(const char *s, int len) +{ + char *bufptr; + int wcount; + int i; + + /* 'O'utput */ + gdbmsgbuf[0] = 'O'; + + /* Fill and send buffers... */ + while (len > 0) { + bufptr = gdbmsgbuf + 1; + + /* Calculate how many this time */ + if ((len << 1) > (BUFMAX - 2)) + wcount = (BUFMAX - 2) >> 1; + else + wcount = len; + + /* Pack in hex chars */ + for (i = 0; i < wcount; i++) + bufptr = pack_hex_byte(bufptr, s[i]); + *bufptr = '\0'; + + /* Move up */ + s += wcount; + len -= wcount; + + /* Write packet */ + put_packet(gdbmsgbuf); + } +} + +/* + * Return true if there is a valid kgdb I/O module. Also if no + * debugger is attached a message can be printed to the console about + * waiting for the debugger to attach. + * + * The print_wait argument is only to be true when called from inside + * the core kgdb_handle_exception, because it will wait for the + * debugger to attach. + */ +static int kgdb_io_ready(int print_wait) +{ + if (!kgdb_io_ops) + return 0; + if (kgdb_connected) + return 1; + if (atomic_read(&kgdb_setting_breakpoint)) + return 1; + if (print_wait) + printk(KERN_CRIT "KGDB: Waiting for remote debugger\n"); + return 1; +} + +/* + * All the functions that start with gdb_cmd are the various + * operations to implement the handlers for the gdbserial protocol + * where KGDB is communicating with an external debugger + */ + +/* Handle the '?' status packets */ +static void gdb_cmd_status(struct kgdb_state *ks) +{ + /* + * We know that this packet is only sent + * during initial connect. So to be safe, + * we clear out our breakpoints now in case + * GDB is reconnecting. + */ + remove_all_break(); + + remcom_out_buffer[0] = 'S'; + pack_hex_byte(&remcom_out_buffer[1], ks->signo); +} + +/* Handle the 'g' get registers request */ +static void gdb_cmd_getregs(struct kgdb_state *ks) +{ + struct task_struct *thread; + void *local_debuggerinfo; + int i; + + thread = kgdb_usethread; + if (!thread) { + thread = kgdb_info[ks->cpu].task; + local_debuggerinfo = kgdb_info[ks->cpu].debuggerinfo; + } else { + local_debuggerinfo = NULL; + for (i = 0; i < NR_CPUS; i++) { + /* + * Try to find the task on some other + * or possibly this node if we do not + * find the matching task then we try + * to approximate the results. + */ + if (thread == kgdb_info[i].task) + local_debuggerinfo = kgdb_info[i].debuggerinfo; + } + } + + /* + * All threads that don't have debuggerinfo should be + * in __schedule() sleeping, since all other CPUs + * are in kgdb_wait, and thus have debuggerinfo. + */ + if (local_debuggerinfo) { + pt_regs_to_gdb_regs(gdb_regs, local_debuggerinfo); + } else { + /* + * Pull stuff saved during switch_to; nothing + * else is accessible (or even particularly + * relevant). + * + * This should be enough for a stack trace. + */ + sleeping_thread_to_gdb_regs(gdb_regs, thread); + } + kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer, NUMREGBYTES); +} + +/* Handle the 'G' set registers request */ +static void gdb_cmd_setregs(struct kgdb_state *ks) +{ + kgdb_hex2mem(&remcom_in_buffer[1], (char *)gdb_regs, NUMREGBYTES); + + if (kgdb_usethread && kgdb_usethread != current) { + error_packet(remcom_out_buffer, -EINVAL); + } else { + gdb_regs_to_pt_regs(gdb_regs, ks->linux_regs); + strcpy(remcom_out_buffer, "OK"); + } +} + +/* Handle the 'm' memory read bytes */ +static void gdb_cmd_memread(struct kgdb_state *ks) +{ + char *ptr = &remcom_in_buffer[1]; + unsigned long length; + unsigned long addr; + int err; + + if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' && + kgdb_hex2long(&ptr, &length) > 0) { + err = kgdb_mem2hex((char *)addr, remcom_out_buffer, length); + if (err) + error_packet(remcom_out_buffer, err); + } else { + error_packet(remcom_out_buffer, -EINVAL); + } +} + +/* Handle the 'M' memory write bytes */ +static void gdb_cmd_memwrite(struct kgdb_state *ks) +{ + int err = write_mem_msg(0); + + if (err) + error_packet(remcom_out_buffer, err); + else + strcpy(remcom_out_buffer, "OK"); +} + +/* Handle the 'X' memory binary write bytes */ +static void gdb_cmd_binwrite(struct kgdb_state *ks) +{ + int err = write_mem_msg(1); + + if (err) + error_packet(remcom_out_buffer, err); + else + strcpy(remcom_out_buffer, "OK"); +} + +/* Handle the 'D' or 'k', detach or kill packets */ +static void gdb_cmd_detachkill(struct kgdb_state *ks) +{ + int error; + + /* The detach case */ + if (remcom_in_buffer[0] == 'D') { + error = remove_all_break(); + if (error < 0) { + error_packet(remcom_out_buffer, error); + } else { + strcpy(remcom_out_buffer, "OK"); + kgdb_connected = 0; + } + put_packet(remcom_out_buffer); + } else { + /* + * Assume the kill case, with no exit code checking, + * trying to force detach the debugger: + */ + remove_all_break(); + kgdb_connected = 0; + } +} + +/* Handle the 'R' reboot packets */ +static int gdb_cmd_reboot(struct kgdb_state *ks) +{ + /* For now, only honor R0 */ + if (strcmp(remcom_in_buffer, "R0") == 0) { + printk(KERN_CRIT "Executing emergency reboot\n"); + strcpy(remcom_out_buffer, "OK"); + put_packet(remcom_out_buffer); + + /* + * Execution should not return from + * machine_emergency_restart() + */ + machine_emergency_restart(); + kgdb_connected = 0; + + return 1; + } + return 0; +} + +/* Handle the 'q' query packets */ +static void gdb_cmd_query(struct kgdb_state *ks) +{ + struct task_struct *thread; + unsigned char thref[8]; + char *ptr; + int i; + + switch (remcom_in_buffer[1]) { + case 's': + case 'f': + if (memcmp(remcom_in_buffer + 2, "ThreadInfo", 10)) { + error_packet(remcom_out_buffer, -EINVAL); + break; + } + + if (remcom_in_buffer[1] == 'f') + ks->threadid = 1; + + remcom_out_buffer[0] = 'm'; + ptr = remcom_out_buffer + 1; + + for (i = 0; i < 17; ks->threadid++) { + thread = getthread(ks->linux_regs, ks->threadid); + if (thread) { + int_to_threadref(thref, ks->threadid); + pack_threadid(ptr, thref); + ptr += BUF_THREAD_ID_SIZE; + *(ptr++) = ','; + i++; + } + } + *(--ptr) = '\0'; + break; + + case 'C': + /* Current thread id */ + strcpy(remcom_out_buffer, "QC"); + ks->threadid = shadow_pid(current->pid); + int_to_threadref(thref, ks->threadid); + pack_threadid(remcom_out_buffer + 2, thref); + break; + case 'T': + if (memcmp(remcom_in_buffer + 1, "ThreadExtraInfo,", 16)) { + error_packet(remcom_out_buffer, -EINVAL); + break; + } + ks->threadid = 0; + ptr = remcom_in_buffer + 17; + kgdb_hex2long(&ptr, &ks->threadid); + if (!getthread(ks->linux_regs, ks->threadid)) { + error_packet(remcom_out_buffer, -EINVAL); + break; + } + if (ks->threadid > 0) { + kgdb_mem2hex(getthread(ks->linux_regs, + ks->threadid)->comm, + remcom_out_buffer, 16); + } else { + static char tmpstr[23 + BUF_THREAD_ID_SIZE]; + + sprintf(tmpstr, "Shadow task %d for pid 0", + (int)(-ks->threadid-1)); + kgdb_mem2hex(tmpstr, remcom_out_buffer, strlen(tmpstr)); + } + break; + } +} + +/* Handle the 'H' task query packets */ +static void gdb_cmd_task(struct kgdb_state *ks) +{ + struct task_struct *thread; + char *ptr; + + switch (remcom_in_buffer[1]) { + case 'g': + ptr = &remcom_in_buffer[2]; + kgdb_hex2long(&ptr, &ks->threadid); + thread = getthread(ks->linux_regs, ks->threadid); + if (!thread && ks->threadid > 0) { + error_packet(remcom_out_buffer, -EINVAL); + break; + } + kgdb_usethread = thread; + ks->kgdb_usethreadid = ks->threadid; + strcpy(remcom_out_buffer, "OK"); + break; + case 'c': + ptr = &remcom_in_buffer[2]; + kgdb_hex2long(&ptr, &ks->threadid); + if (!ks->threadid) { + kgdb_contthread = NULL; + } else { + thread = getthread(ks->linux_regs, ks->threadid); + if (!thread && ks->threadid > 0) { + error_packet(remcom_out_buffer, -EINVAL); + break; + } + kgdb_contthread = thread; + } + strcpy(remcom_out_buffer, "OK"); + break; + } +} + +/* Handle the 'T' thread query packets */ +static void gdb_cmd_thread(struct kgdb_state *ks) +{ + char *ptr = &remcom_in_buffer[1]; + struct task_struct *thread; + + kgdb_hex2long(&ptr, &ks->threadid); + thread = getthread(ks->linux_regs, ks->threadid); + if (thread) + strcpy(remcom_out_buffer, "OK"); + else + error_packet(remcom_out_buffer, -EINVAL); +} + +/* Handle the 'z' or 'Z' breakpoint remove or set packets */ +static void gdb_cmd_break(struct kgdb_state *ks) +{ + /* + * Since GDB-5.3, it's been drafted that '0' is a software + * breakpoint, '1' is a hardware breakpoint, so let's do that. + */ + char *bpt_type = &remcom_in_buffer[1]; + char *ptr = &remcom_in_buffer[2]; + unsigned long addr; + unsigned long length; + int error = 0; + + if (arch_kgdb_ops.set_hw_breakpoint && *bpt_type >= '1') { + /* Unsupported */ + if (*bpt_type > '4') + return; + } else { + if (*bpt_type != '0' && *bpt_type != '1') + /* Unsupported. */ + return; + } + + /* + * Test if this is a hardware breakpoint, and + * if we support it: + */ + if (*bpt_type == '1' && !(arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT)) + /* Unsupported. */ + return; + + if (*(ptr++) != ',') { + error_packet(remcom_out_buffer, -EINVAL); + return; + } + if (!kgdb_hex2long(&ptr, &addr)) { + error_packet(remcom_out_buffer, -EINVAL); + return; + } + if (*(ptr++) != ',' || + !kgdb_hex2long(&ptr, &length)) { + error_packet(remcom_out_buffer, -EINVAL); + return; + } + + if (remcom_in_buffer[0] == 'Z' && *bpt_type == '0') + error = kgdb_set_sw_break(addr); + else if (remcom_in_buffer[0] == 'z' && *bpt_type == '0') + error = kgdb_remove_sw_break(addr); + else if (remcom_in_buffer[0] == 'Z') + error = arch_kgdb_ops.set_hw_breakpoint(addr, + (int)length, *bpt_type - '0'); + else if (remcom_in_buffer[0] == 'z') + error = arch_kgdb_ops.remove_hw_breakpoint(addr, + (int) length, *bpt_type - '0'); + + if (error == 0) + strcpy(remcom_out_buffer, "OK"); + else + error_packet(remcom_out_buffer, error); +} + +/* Handle the 'C' signal / exception passing packets */ +static int gdb_cmd_exception_pass(struct kgdb_state *ks) +{ + /* C09 == pass exception + * C15 == detach kgdb, pass exception + */ + if (remcom_in_buffer[1] == '0' && remcom_in_buffer[2] == '9') { + + ks->pass_exception = 1; + remcom_in_buffer[0] = 'c'; + + } else if (remcom_in_buffer[1] == '1' && remcom_in_buffer[2] == '5') { + + ks->pass_exception = 1; + remcom_in_buffer[0] = 'D'; + remove_all_break(); + kgdb_connected = 0; + return 1; + + } else { + error_packet(remcom_out_buffer, -EINVAL); + return 0; + } + + /* Indicate fall through */ + return -1; +} + +/* + * This function performs all gdbserial command procesing + */ +static int gdb_serial_stub(struct kgdb_state *ks) +{ + int error = 0; + int tmp; + + /* Clear the out buffer. */ + memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer)); + + if (kgdb_connected) { + unsigned char thref[8]; + char *ptr; + + /* Reply to host that an exception has occurred */ + ptr = remcom_out_buffer; + *ptr++ = 'T'; + ptr = pack_hex_byte(ptr, ks->signo); + ptr += strlen(strcpy(ptr, "thread:")); + int_to_threadref(thref, shadow_pid(current->pid)); + ptr = pack_threadid(ptr, thref); + *ptr++ = ';'; + put_packet(remcom_out_buffer); + } + + kgdb_usethread = kgdb_info[ks->cpu].task; + ks->kgdb_usethreadid = shadow_pid(kgdb_info[ks->cpu].task->pid); + ks->pass_exception = 0; + + while (1) { + error = 0; + + /* Clear the out buffer. */ + memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer)); + + get_packet(remcom_in_buffer); + + switch (remcom_in_buffer[0]) { + case '?': /* gdbserial status */ + gdb_cmd_status(ks); + break; + case 'g': /* return the value of the CPU registers */ + gdb_cmd_getregs(ks); + break; + case 'G': /* set the value of the CPU registers - return OK */ + gdb_cmd_setregs(ks); + break; + case 'm': /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + gdb_cmd_memread(ks); + break; + case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */ + gdb_cmd_memwrite(ks); + break; + case 'X': /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */ + gdb_cmd_binwrite(ks); + break; + /* kill or detach. KGDB should treat this like a + * continue. + */ + case 'D': /* Debugger detach */ + case 'k': /* Debugger detach via kill */ + gdb_cmd_detachkill(ks); + goto default_handle; + case 'R': /* Reboot */ + if (gdb_cmd_reboot(ks)) + goto default_handle; + break; + case 'q': /* query command */ + gdb_cmd_query(ks); + break; + case 'H': /* task related */ + gdb_cmd_task(ks); + break; + case 'T': /* Query thread status */ + gdb_cmd_thread(ks); + break; + case 'z': /* Break point remove */ + case 'Z': /* Break point set */ + gdb_cmd_break(ks); + break; + case 'C': /* Exception passing */ + tmp = gdb_cmd_exception_pass(ks); + if (tmp > 0) + goto default_handle; + if (tmp == 0) + break; + /* Fall through on tmp < 0 */ + case 'c': /* Continue packet */ + case 's': /* Single step packet */ + if (kgdb_contthread && kgdb_contthread != current) { + /* Can't switch threads in kgdb */ + error_packet(remcom_out_buffer, -EINVAL); + break; + } + kgdb_activate_sw_breakpoints(); + /* Fall through to default processing */ + default: +default_handle: + error = kgdb_arch_handle_exception(ks->ex_vector, + ks->signo, + ks->err_code, + remcom_in_buffer, + remcom_out_buffer, + ks->linux_regs); + /* + * Leave cmd processing on error, detach, + * kill, continue, or single step. + */ + if (error >= 0 || remcom_in_buffer[0] == 'D' || + remcom_in_buffer[0] == 'k') { + error = 0; + goto kgdb_exit; + } + + } + + /* reply to the request */ + put_packet(remcom_out_buffer); + } + +kgdb_exit: + if (ks->pass_exception) + error = 1; + return error; +} + +static int kgdb_reenter_check(struct kgdb_state *ks) +{ + unsigned long addr; + + if (atomic_read(&kgdb_active) != raw_smp_processor_id()) + return 0; + + /* Panic on recursive debugger calls: */ + exception_level++; + addr = kgdb_arch_pc(ks->ex_vector, ks->linux_regs); + kgdb_deactivate_sw_breakpoints(); + + /* + * If the break point removed ok at the place exception + * occurred, try to recover and print a warning to the end + * user because the user planted a breakpoint in a place that + * KGDB needs in order to function. + */ + if (kgdb_remove_sw_break(addr) == 0) { + exception_level = 0; + kgdb_skipexception(ks->ex_vector, ks->linux_regs); + kgdb_activate_sw_breakpoints(); + printk(KERN_CRIT "KGDB: re-enter error: breakpoint removed %lx\n", + addr); + WARN_ON_ONCE(1); + + return 1; + } + remove_all_break(); + kgdb_skipexception(ks->ex_vector, ks->linux_regs); + + if (exception_level > 1) { + dump_stack(); + panic("Recursive entry to debugger"); + } + + printk(KERN_CRIT "KGDB: re-enter exception: ALL breakpoints killed\n"); + dump_stack(); + panic("Recursive entry to debugger"); + + return 1; +} + +/* + * kgdb_handle_exception() - main entry point from a kernel exception + * + * Locking hierarchy: + * interface locks, if any (begin_session) + * kgdb lock (kgdb_active) + */ +int +kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) +{ + struct kgdb_state kgdb_var; + struct kgdb_state *ks = &kgdb_var; + unsigned long flags; + int error = 0; + int i, cpu; + + ks->cpu = raw_smp_processor_id(); + ks->ex_vector = evector; + ks->signo = signo; + ks->ex_vector = evector; + ks->err_code = ecode; + ks->kgdb_usethreadid = 0; + ks->linux_regs = regs; + + if (kgdb_reenter_check(ks)) + return 0; /* Ouch, double exception ! */ + +acquirelock: + /* + * Interrupts will be restored by the 'trap return' code, except when + * single stepping. + */ + local_irq_save(flags); + + cpu = raw_smp_processor_id(); + + /* + * Acquire the kgdb_active lock: + */ + while (atomic_cmpxchg(&kgdb_active, -1, cpu) != -1) + cpu_relax(); + + /* + * Do not start the debugger connection on this CPU if the last + * instance of the exception handler wanted to come into the + * debugger on a different CPU via a single step + */ + if (atomic_read(&kgdb_cpu_doing_single_step) != -1 && + atomic_read(&kgdb_cpu_doing_single_step) != cpu) { + + atomic_set(&kgdb_active, -1); + clocksource_touch_watchdog(); + local_irq_restore(flags); + + goto acquirelock; + } + + if (!kgdb_io_ready(1)) { + error = 1; + goto kgdb_restore; /* No I/O connection, so resume the system */ + } + + /* + * Don't enter if we have hit a removed breakpoint. + */ + if (kgdb_skipexception(ks->ex_vector, ks->linux_regs)) + goto kgdb_restore; + + /* Call the I/O driver's pre_exception routine */ + if (kgdb_io_ops->pre_exception) + kgdb_io_ops->pre_exception(); + + kgdb_info[ks->cpu].debuggerinfo = ks->linux_regs; + kgdb_info[ks->cpu].task = current; + + kgdb_disable_hw_debug(ks->linux_regs); + + /* + * Get the passive CPU lock which will hold all the non-primary + * CPU in a spin state while the debugger is active + */ + if (!kgdb_single_step || !kgdb_contthread) { + for (i = 0; i < NR_CPUS; i++) + atomic_set(&passive_cpu_wait[i], 1); + } + + /* + * spin_lock code is good enough as a barrier so we don't + * need one here: + */ + atomic_set(&cpu_in_kgdb[ks->cpu], 1); + +#ifdef CONFIG_SMP + /* Signal the other CPUs to enter kgdb_wait() */ + if ((!kgdb_single_step || !kgdb_contthread) && kgdb_do_roundup) + kgdb_roundup_cpus(flags); +#endif + + /* + * Wait for the other CPUs to be notified and be waiting for us: + */ + for_each_online_cpu(i) { + while (!atomic_read(&cpu_in_kgdb[i])) + cpu_relax(); + } + + /* + * At this point the primary processor is completely + * in the debugger and all secondary CPUs are quiescent + */ + kgdb_post_primary_code(ks->linux_regs, ks->ex_vector, ks->err_code); + kgdb_deactivate_sw_breakpoints(); + kgdb_single_step = 0; + kgdb_contthread = NULL; + exception_level = 0; + + /* Talk to debugger with gdbserial protocol */ + error = gdb_serial_stub(ks); + + /* Call the I/O driver's post_exception routine */ + if (kgdb_io_ops->post_exception) + kgdb_io_ops->post_exception(); + + kgdb_info[ks->cpu].debuggerinfo = NULL; + kgdb_info[ks->cpu].task = NULL; + atomic_set(&cpu_in_kgdb[ks->cpu], 0); + + if (!kgdb_single_step || !kgdb_contthread) { + for (i = NR_CPUS-1; i >= 0; i--) + atomic_set(&passive_cpu_wait[i], 0); + /* + * Wait till all the CPUs have quit + * from the debugger. + */ + for_each_online_cpu(i) { + while (atomic_read(&cpu_in_kgdb[i])) + cpu_relax(); + } + } + +kgdb_restore: + /* Free kgdb_active */ + atomic_set(&kgdb_active, -1); + clocksource_touch_watchdog(); + local_irq_restore(flags); + + return error; +} + +int kgdb_nmicallback(int cpu, void *regs) +{ +#ifdef CONFIG_SMP + if (!atomic_read(&cpu_in_kgdb[cpu]) && + atomic_read(&kgdb_active) != cpu && + atomic_read(&cpu_in_kgdb[atomic_read(&kgdb_active)])) { + kgdb_wait((struct pt_regs *)regs); + return 0; + } +#endif + return 1; +} + +void kgdb_console_write(struct console *co, const char *s, unsigned count) +{ + unsigned long flags; + + /* If we're debugging, or KGDB has not connected, don't try + * and print. */ + if (!kgdb_connected || atomic_read(&kgdb_active) != -1) + return; + + local_irq_save(flags); + kgdb_msg_write(s, count); + local_irq_restore(flags); +} + +static struct console kgdbcons = { + .name = "kgdb", + .write = kgdb_console_write, + .flags = CON_PRINTBUFFER | CON_ENABLED, + .index = -1, +}; + +#ifdef CONFIG_MAGIC_SYSRQ +static void sysrq_handle_gdb(int key, struct tty_struct *tty) +{ + if (!kgdb_io_ops) { + printk(KERN_CRIT "ERROR: No KGDB I/O module available\n"); + return; + } + if (!kgdb_connected) + printk(KERN_CRIT "Entering KGDB\n"); + + kgdb_breakpoint(); +} + +static struct sysrq_key_op sysrq_gdb_op = { + .handler = sysrq_handle_gdb, + .help_msg = "Gdb", + .action_msg = "GDB", +}; +#endif + +static void kgdb_register_callbacks(void) +{ + if (!kgdb_io_module_registered) { + kgdb_io_module_registered = 1; + kgdb_arch_init(); +#ifdef CONFIG_MAGIC_SYSRQ + register_sysrq_key('g', &sysrq_gdb_op); +#endif + if (kgdb_use_con && !kgdb_con_registered) { + register_console(&kgdbcons); + kgdb_con_registered = 1; + } + } +} + +static void kgdb_unregister_callbacks(void) +{ + /* + * When this routine is called KGDB should unregister from the + * panic handler and clean up, making sure it is not handling any + * break exceptions at the time. + */ + if (kgdb_io_module_registered) { + kgdb_io_module_registered = 0; + kgdb_arch_exit(); +#ifdef CONFIG_MAGIC_SYSRQ + unregister_sysrq_key('g', &sysrq_gdb_op); +#endif + if (kgdb_con_registered) { + unregister_console(&kgdbcons); + kgdb_con_registered = 0; + } + } +} + +static void kgdb_initial_breakpoint(void) +{ + kgdb_break_asap = 0; + + printk(KERN_CRIT "kgdb: Waiting for connection from remote gdb...\n"); + kgdb_breakpoint(); +} + +/** + * kgdb_register_io_module - register KGDB IO module + * @new_kgdb_io_ops: the io ops vector + * + * Register it with the KGDB core. + */ +int kgdb_register_io_module(struct kgdb_io *new_kgdb_io_ops) +{ + int err; + + spin_lock(&kgdb_registration_lock); + + if (kgdb_io_ops) { + spin_unlock(&kgdb_registration_lock); + + printk(KERN_ERR "kgdb: Another I/O driver is already " + "registered with KGDB.\n"); + return -EBUSY; + } + + if (new_kgdb_io_ops->init) { + err = new_kgdb_io_ops->init(); + if (err) { + spin_unlock(&kgdb_registration_lock); + return err; + } + } + + kgdb_io_ops = new_kgdb_io_ops; + + spin_unlock(&kgdb_registration_lock); + + printk(KERN_INFO "kgdb: Registered I/O driver %s.\n", + new_kgdb_io_ops->name); + + /* Arm KGDB now. */ + kgdb_register_callbacks(); + + if (kgdb_break_asap) + kgdb_initial_breakpoint(); + + return 0; +} +EXPORT_SYMBOL_GPL(kgdb_register_io_module); + +/** + * kkgdb_unregister_io_module - unregister KGDB IO module + * @old_kgdb_io_ops: the io ops vector + * + * Unregister it with the KGDB core. + */ +void kgdb_unregister_io_module(struct kgdb_io *old_kgdb_io_ops) +{ + BUG_ON(kgdb_connected); + + /* + * KGDB is no longer able to communicate out, so + * unregister our callbacks and reset state. + */ + kgdb_unregister_callbacks(); + + spin_lock(&kgdb_registration_lock); + + WARN_ON_ONCE(kgdb_io_ops != old_kgdb_io_ops); + kgdb_io_ops = NULL; + + spin_unlock(&kgdb_registration_lock); + + printk(KERN_INFO + "kgdb: Unregistered I/O driver %s, debugger disabled.\n", + old_kgdb_io_ops->name); +} +EXPORT_SYMBOL_GPL(kgdb_unregister_io_module); + +/** + * kgdb_breakpoint - generate breakpoint exception + * + * This function will generate a breakpoint exception. It is used at the + * beginning of a program to sync up with a debugger and can be used + * otherwise as a quick means to stop program execution and "break" into + * the debugger. + */ +void kgdb_breakpoint(void) +{ + atomic_set(&kgdb_setting_breakpoint, 1); + wmb(); /* Sync point before breakpoint */ + arch_kgdb_breakpoint(); + wmb(); /* Sync point after breakpoint */ + atomic_set(&kgdb_setting_breakpoint, 0); +} +EXPORT_SYMBOL_GPL(kgdb_breakpoint); + +static int __init opt_kgdb_wait(char *str) +{ + kgdb_break_asap = 1; + + if (kgdb_io_module_registered) + kgdb_initial_breakpoint(); + + return 0; +} + +early_param("kgdbwait", opt_kgdb_wait); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 2eae91f954ca..ae5c6c147c4b 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1087,45 +1087,45 @@ static void check_process_timers(struct task_struct *tsk, maxfire = 20; prof_expires = cputime_zero; while (!list_empty(timers)) { - struct cpu_timer_list *t = list_first_entry(timers, + struct cpu_timer_list *tl = list_first_entry(timers, struct cpu_timer_list, entry); - if (!--maxfire || cputime_lt(ptime, t->expires.cpu)) { - prof_expires = t->expires.cpu; + if (!--maxfire || cputime_lt(ptime, tl->expires.cpu)) { + prof_expires = tl->expires.cpu; break; } - t->firing = 1; - list_move_tail(&t->entry, firing); + tl->firing = 1; + list_move_tail(&tl->entry, firing); } ++timers; maxfire = 20; virt_expires = cputime_zero; while (!list_empty(timers)) { - struct cpu_timer_list *t = list_first_entry(timers, + struct cpu_timer_list *tl = list_first_entry(timers, struct cpu_timer_list, entry); - if (!--maxfire || cputime_lt(utime, t->expires.cpu)) { - virt_expires = t->expires.cpu; + if (!--maxfire || cputime_lt(utime, tl->expires.cpu)) { + virt_expires = tl->expires.cpu; break; } - t->firing = 1; - list_move_tail(&t->entry, firing); + tl->firing = 1; + list_move_tail(&tl->entry, firing); } ++timers; maxfire = 20; sched_expires = 0; while (!list_empty(timers)) { - struct cpu_timer_list *t = list_first_entry(timers, + struct cpu_timer_list *tl = list_first_entry(timers, struct cpu_timer_list, entry); - if (!--maxfire || sum_sched_runtime < t->expires.sched) { - sched_expires = t->expires.sched; + if (!--maxfire || sum_sched_runtime < tl->expires.sched) { + sched_expires = tl->expires.sched; break; } - t->firing = 1; - list_move_tail(&t->entry, firing); + tl->firing = 1; + list_move_tail(&tl->entry, firing); } /* diff --git a/kernel/printk.c b/kernel/printk.c index c46a20a19a15..bdd4ea8c3f2b 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -643,8 +643,21 @@ static int acquire_console_semaphore_for_printk(unsigned int cpu) { int retval = 0; - if (can_use_console(cpu)) - retval = !try_acquire_console_sem(); + if (!try_acquire_console_sem()) { + retval = 1; + + /* + * If we can't use the console, we need to release + * the console semaphore by hand to avoid flushing + * the buffer. We need to hold the console semaphore + * in order to do this test safely. + */ + if (!can_use_console(cpu)) { + console_locked = 0; + up(&console_sem); + retval = 0; + } + } printk_cpu = UINT_MAX; spin_unlock(&logbuf_lock); return retval; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 86a93376282c..0080968d3e4a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -510,10 +510,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) if (!initial) { /* sleeps upto a single latency don't count. */ - if (sched_feat(NEW_FAIR_SLEEPERS)) { - vruntime -= calc_delta_fair(sysctl_sched_latency, - &cfs_rq->load); - } + if (sched_feat(NEW_FAIR_SLEEPERS)) + vruntime -= sysctl_sched_latency; /* ensure we never gain time by being placed backwards. */ vruntime = max_vruntime(se->vruntime, vruntime); diff --git a/kernel/semaphore.c b/kernel/semaphore.c new file mode 100644 index 000000000000..5c2942e768cd --- /dev/null +++ b/kernel/semaphore.c @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2008 Intel Corporation + * Author: Matthew Wilcox <willy@linux.intel.com> + * + * Distributed under the terms of the GNU GPL, version 2 + * + * This file implements counting semaphores. + * A counting semaphore may be acquired 'n' times before sleeping. + * See mutex.c for single-acquisition sleeping locks which enforce + * rules which allow code to be debugged more easily. + */ + +/* + * Some notes on the implementation: + * + * The spinlock controls access to the other members of the semaphore. + * down_trylock() and up() can be called from interrupt context, so we + * have to disable interrupts when taking the lock. It turns out various + * parts of the kernel expect to be able to use down() on a semaphore in + * interrupt context when they know it will succeed, so we have to use + * irqsave variants for down(), down_interruptible() and down_killable() + * too. + * + * The ->count variable represents how many more tasks can acquire this + * semaphore. If it's zero, there may be tasks waiting on the wait_list. + */ + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/semaphore.h> +#include <linux/spinlock.h> + +static noinline void __down(struct semaphore *sem); +static noinline int __down_interruptible(struct semaphore *sem); +static noinline int __down_killable(struct semaphore *sem); +static noinline int __down_timeout(struct semaphore *sem, long jiffies); +static noinline void __up(struct semaphore *sem); + +/** + * down - acquire the semaphore + * @sem: the semaphore to be acquired + * + * Acquires the semaphore. If no more tasks are allowed to acquire the + * semaphore, calling this function will put the task to sleep until the + * semaphore is released. + * + * Use of this function is deprecated, please use down_interruptible() or + * down_killable() instead. + */ +void down(struct semaphore *sem) +{ + unsigned long flags; + + spin_lock_irqsave(&sem->lock, flags); + if (likely(sem->count > 0)) + sem->count--; + else + __down(sem); + spin_unlock_irqrestore(&sem->lock, flags); +} +EXPORT_SYMBOL(down); + +/** + * down_interruptible - acquire the semaphore unless interrupted + * @sem: the semaphore to be acquired + * + * Attempts to acquire the semaphore. If no more tasks are allowed to + * acquire the semaphore, calling this function will put the task to sleep. + * If the sleep is interrupted by a signal, this function will return -EINTR. + * If the semaphore is successfully acquired, this function returns 0. + */ +int down_interruptible(struct semaphore *sem) +{ + unsigned long flags; + int result = 0; + + spin_lock_irqsave(&sem->lock, flags); + if (likely(sem->count > 0)) + sem->count--; + else + result = __down_interruptible(sem); + spin_unlock_irqrestore(&sem->lock, flags); + + return result; +} +EXPORT_SYMBOL(down_interruptible); + +/** + * down_killable - acquire the semaphore unless killed + * @sem: the semaphore to be acquired + * + * Attempts to acquire the semaphore. If no more tasks are allowed to + * acquire the semaphore, calling this function will put the task to sleep. + * If the sleep is interrupted by a fatal signal, this function will return + * -EINTR. If the semaphore is successfully acquired, this function returns + * 0. + */ +int down_killable(struct semaphore *sem) +{ + unsigned long flags; + int result = 0; + + spin_lock_irqsave(&sem->lock, flags); + if (likely(sem->count > 0)) + sem->count--; + else + result = __down_killable(sem); + spin_unlock_irqrestore(&sem->lock, flags); + + return result; +} +EXPORT_SYMBOL(down_killable); + +/** + * down_trylock - try to acquire the semaphore, without waiting + * @sem: the semaphore to be acquired + * + * Try to acquire the semaphore atomically. Returns 0 if the mutex has + * been acquired successfully or 1 if it it cannot be acquired. + * + * NOTE: This return value is inverted from both spin_trylock and + * mutex_trylock! Be careful about this when converting code. + * + * Unlike mutex_trylock, this function can be used from interrupt context, + * and the semaphore can be released by any task or interrupt. + */ +int down_trylock(struct semaphore *sem) +{ + unsigned long flags; + int count; + + spin_lock_irqsave(&sem->lock, flags); + count = sem->count - 1; + if (likely(count >= 0)) + sem->count = count; + spin_unlock_irqrestore(&sem->lock, flags); + + return (count < 0); +} +EXPORT_SYMBOL(down_trylock); + +/** + * down_timeout - acquire the semaphore within a specified time + * @sem: the semaphore to be acquired + * @jiffies: how long to wait before failing + * + * Attempts to acquire the semaphore. If no more tasks are allowed to + * acquire the semaphore, calling this function will put the task to sleep. + * If the semaphore is not released within the specified number of jiffies, + * this function returns -ETIME. It returns 0 if the semaphore was acquired. + */ +int down_timeout(struct semaphore *sem, long jiffies) +{ + unsigned long flags; + int result = 0; + + spin_lock_irqsave(&sem->lock, flags); + if (likely(sem->count > 0)) + sem->count--; + else + result = __down_timeout(sem, jiffies); + spin_unlock_irqrestore(&sem->lock, flags); + + return result; +} +EXPORT_SYMBOL(down_timeout); + +/** + * up - release the semaphore + * @sem: the semaphore to release + * + * Release the semaphore. Unlike mutexes, up() may be called from any + * context and even by tasks which have never called down(). + */ +void up(struct semaphore *sem) +{ + unsigned long flags; + + spin_lock_irqsave(&sem->lock, flags); + if (likely(list_empty(&sem->wait_list))) + sem->count++; + else + __up(sem); + spin_unlock_irqrestore(&sem->lock, flags); +} +EXPORT_SYMBOL(up); + +/* Functions for the contended case */ + +struct semaphore_waiter { + struct list_head list; + struct task_struct *task; + int up; +}; + +/* + * Because this function is inlined, the 'state' parameter will be + * constant, and thus optimised away by the compiler. Likewise the + * 'timeout' parameter for the cases without timeouts. + */ +static inline int __sched __down_common(struct semaphore *sem, long state, + long timeout) +{ + struct task_struct *task = current; + struct semaphore_waiter waiter; + + list_add_tail(&waiter.list, &sem->wait_list); + waiter.task = task; + waiter.up = 0; + + for (;;) { + if (state == TASK_INTERRUPTIBLE && signal_pending(task)) + goto interrupted; + if (state == TASK_KILLABLE && fatal_signal_pending(task)) + goto interrupted; + if (timeout <= 0) + goto timed_out; + __set_task_state(task, state); + spin_unlock_irq(&sem->lock); + timeout = schedule_timeout(timeout); + spin_lock_irq(&sem->lock); + if (waiter.up) + return 0; + } + + timed_out: + list_del(&waiter.list); + return -ETIME; + + interrupted: + list_del(&waiter.list); + return -EINTR; +} + +static noinline void __sched __down(struct semaphore *sem) +{ + __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); +} + +static noinline int __sched __down_interruptible(struct semaphore *sem) +{ + return __down_common(sem, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); +} + +static noinline int __sched __down_killable(struct semaphore *sem) +{ + return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT); +} + +static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies) +{ + return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); +} + +static noinline void __sched __up(struct semaphore *sem) +{ + struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, + struct semaphore_waiter, list); + list_del(&waiter->list); + waiter->up = 1; + wake_up_process(waiter->task); +} diff --git a/kernel/signal.c b/kernel/signal.c index 6af1210092c3..cc8303cd093d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1757,6 +1757,45 @@ static int do_signal_stop(int signr) return 1; } +static int ptrace_signal(int signr, siginfo_t *info, + struct pt_regs *regs, void *cookie) +{ + if (!(current->ptrace & PT_PTRACED)) + return signr; + + ptrace_signal_deliver(regs, cookie); + + /* Let the debugger run. */ + ptrace_stop(signr, 0, info); + + /* We're back. Did the debugger cancel the sig? */ + signr = current->exit_code; + if (signr == 0) + return signr; + + current->exit_code = 0; + + /* Update the siginfo structure if the signal has + changed. If the debugger wanted something + specific in the siginfo structure then it should + have updated *info via PTRACE_SETSIGINFO. */ + if (signr != info->si_signo) { + info->si_signo = signr; + info->si_errno = 0; + info->si_code = SI_USER; + info->si_pid = task_pid_vnr(current->parent); + info->si_uid = current->parent->uid; + } + + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr)) { + specific_send_sig_info(signr, info, current); + signr = 0; + } + + return signr; +} + int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie) { @@ -1785,36 +1824,10 @@ relock: if (!signr) break; /* will return 0 */ - if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { - ptrace_signal_deliver(regs, cookie); - - /* Let the debugger run. */ - ptrace_stop(signr, 0, info); - - /* We're back. Did the debugger cancel the sig? */ - signr = current->exit_code; - if (signr == 0) - continue; - - current->exit_code = 0; - - /* Update the siginfo structure if the signal has - changed. If the debugger wanted something - specific in the siginfo structure then it should - have updated *info via PTRACE_SETSIGINFO. */ - if (signr != info->si_signo) { - info->si_signo = signr; - info->si_errno = 0; - info->si_code = SI_USER; - info->si_pid = task_pid_vnr(current->parent); - info->si_uid = current->parent->uid; - } - - /* If the (new) signal is now blocked, requeue it. */ - if (sigismember(¤t->blocked, signr)) { - specific_send_sig_info(signr, info, current); + if (signr != SIGKILL) { + signr = ptrace_signal(signr, info, regs, cookie); + if (!signr) continue; - } } ka = ¤t->sighand->action[signr-1]; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7f60097d443a..73961f35fdc8 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -141,8 +141,16 @@ static void clocksource_watchdog(unsigned long data) } if (!list_empty(&watchdog_list)) { - __mod_timer(&watchdog_timer, - watchdog_timer.expires + WATCHDOG_INTERVAL); + /* + * Cycle through CPUs to check if the CPUs stay + * synchronized to each other. + */ + int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map); + + if (next_cpu >= NR_CPUS) + next_cpu = first_cpu(cpu_online_map); + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); } spin_unlock(&watchdog_lock); } @@ -164,7 +172,8 @@ static void clocksource_check_watchdog(struct clocksource *cs) if (!started && watchdog) { watchdog_last = watchdog->read(); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; - add_timer(&watchdog_timer); + add_timer_on(&watchdog_timer, + first_cpu(cpu_online_map)); } } else { if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) @@ -185,7 +194,8 @@ static void clocksource_check_watchdog(struct clocksource *cs) watchdog_last = watchdog->read(); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; - add_timer(&watchdog_timer); + add_timer_on(&watchdog_timer, + first_cpu(cpu_online_map)); } } } @@ -222,6 +232,18 @@ void clocksource_resume(void) } /** + * clocksource_touch_watchdog - Update watchdog + * + * Update the watchdog after exception contexts such as kgdb so as not + * to incorrectly trip the watchdog. + * + */ +void clocksource_touch_watchdog(void) +{ + clocksource_resume_watchdog(); +} + +/** * clocksource_get_next - Returns the selected clocksource * */ diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index e1bd50cbbf5d..fdfa0c745bb6 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -14,7 +14,7 @@ #include <linux/cpu.h> #include <linux/err.h> #include <linux/hrtimer.h> -#include <linux/irq.h> +#include <linux/interrupt.h> #include <linux/percpu.h> #include <linux/profile.h> #include <linux/sched.h> diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 1bea399a9ef0..4f3886562b8c 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -14,12 +14,14 @@ #include <linux/cpu.h> #include <linux/err.h> #include <linux/hrtimer.h> -#include <linux/irq.h> +#include <linux/interrupt.h> #include <linux/percpu.h> #include <linux/profile.h> #include <linux/sched.h> #include <linux/tick.h> +#include <asm/irq_regs.h> + #include "tick-internal.h" /* diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 0258d3115d54..450c04935b66 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -14,7 +14,7 @@ #include <linux/cpu.h> #include <linux/err.h> #include <linux/hrtimer.h> -#include <linux/irq.h> +#include <linux/interrupt.h> #include <linux/percpu.h> #include <linux/profile.h> #include <linux/sched.h> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 686da821d376..69dba0c71727 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -158,9 +158,8 @@ void tick_nohz_stop_idle(int cpu) } } -static ktime_t tick_nohz_start_idle(int cpu) +static ktime_t tick_nohz_start_idle(struct tick_sched *ts) { - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t now, delta; now = ktime_get(); @@ -201,8 +200,8 @@ void tick_nohz_stop_sched_tick(void) local_irq_save(flags); cpu = smp_processor_id(); - now = tick_nohz_start_idle(cpu); ts = &per_cpu(tick_cpu_sched, cpu); + now = tick_nohz_start_idle(ts); /* * If this cpu is offline and it is the one which updates @@ -222,7 +221,6 @@ void tick_nohz_stop_sched_tick(void) if (need_resched()) goto end; - cpu = smp_processor_id(); if (unlikely(local_softirq_pending())) { static int ratelimit; diff --git a/kernel/timer.c b/kernel/timer.c index b024106daa70..f3d35d4ea42e 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1228,13 +1228,6 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) return 0; } -/* - * lockdep: we want to track each per-CPU base as a separate lock-class, - * but timer-bases are kmalloc()-ed, so we need to attach separate - * keys to them: - */ -static struct lock_class_key base_lock_keys[NR_CPUS]; - static int __cpuinit init_timers_cpu(int cpu) { int j; @@ -1277,7 +1270,6 @@ static int __cpuinit init_timers_cpu(int cpu) } spin_lock_init(&base->lock); - lockdep_set_class(&base->lock, base_lock_keys + cpu); for (j = 0; j < TVN_SIZE; j++) { INIT_LIST_HEAD(base->tv5.vec + j); @@ -1316,8 +1308,8 @@ static void __cpuinit migrate_timers(int cpu) new_base = get_cpu_var(tvec_bases); local_irq_disable(); - double_spin_lock(&new_base->lock, &old_base->lock, - smp_processor_id() < cpu); + spin_lock(&new_base->lock); + spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); BUG_ON(old_base->running_timer); @@ -1330,8 +1322,8 @@ static void __cpuinit migrate_timers(int cpu) migrate_timer_list(new_base, old_base->tv5.vec + i); } - double_spin_unlock(&new_base->lock, &old_base->lock, - smp_processor_id() < cpu); + spin_unlock(&old_base->lock); + spin_unlock(&new_base->lock); local_irq_enable(); put_cpu_var(tvec_bases); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ff06611655af..00ff4d08e370 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -219,6 +219,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; + timer_stats_timer_set_start_info(&dwork->timer); if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) { BUG_ON(timer_pending(timer)); BUG_ON(!list_empty(&work->entry)); @@ -580,6 +581,7 @@ EXPORT_SYMBOL(schedule_delayed_work); int schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { + timer_stats_timer_set_start_info(&dwork->timer); return queue_delayed_work_on(cpu, keventd_wq, dwork, delay); } EXPORT_SYMBOL(schedule_delayed_work_on); |