diff options
author | Peter Zijlstra <peterz@infradead.org> | 2016-03-22 21:42:53 +0100 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2016-04-15 18:16:37 +0200 |
commit | 0227f7c42d9e01b00ea8cbd635aaf92a09b54abc (patch) | |
tree | 7b796ca59c6d703b857475cab4e07595cc2dd15c /arch/s390/mm/fault.c | |
parent | 5a3b7b112884f80ff19b18028fabeb4f9c035518 (diff) | |
download | talos-obmc-linux-0227f7c42d9e01b00ea8cbd635aaf92a09b54abc.tar.gz talos-obmc-linux-0227f7c42d9e01b00ea8cbd635aaf92a09b54abc.zip |
s390: Clarify pagefault interrupt
While looking at set_task_state() users I stumbled over the s390 pfault
interrupt code. Since Heiko provided a great explanation on how it
worked, I figured we ought to preserve this.
Also make a few little tweaks to the code to aid in readability and
explicitly comment the unusual blocking scheme.
Based-on-text-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm/fault.c')
-rw-r--r-- | arch/s390/mm/fault.c | 41 |
1 files changed, 33 insertions, 8 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index cce577feab1e..7a3144017301 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -631,6 +631,29 @@ void pfault_fini(void) static DEFINE_SPINLOCK(pfault_lock); static LIST_HEAD(pfault_list); +#define PF_COMPLETE 0x0080 + +/* + * The mechanism of our pfault code: if Linux is running as guest, runs a user + * space process and the user space process accesses a page that the host has + * paged out we get a pfault interrupt. + * + * This allows us, within the guest, to schedule a different process. Without + * this mechanism the host would have to suspend the whole virtual cpu until + * the page has been paged in. + * + * So when we get such an interrupt then we set the state of the current task + * to uninterruptible and also set the need_resched flag. Both happens within + * interrupt context(!). If we later on want to return to user space we + * recognize the need_resched flag and then call schedule(). It's not very + * obvious how this works... + * + * Of course we have a lot of additional fun with the completion interrupt (-> + * host signals that a page of a process has been paged in and the process can + * continue to run). This interrupt can arrive on any cpu and, since we have + * virtual cpus, actually appear before the interrupt that signals that a page + * is missing. + */ static void pfault_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { @@ -639,10 +662,9 @@ static void pfault_interrupt(struct ext_code ext_code, pid_t pid; /* - * Get the external interruption subcode & pfault - * initial/completion signal bit. VM stores this - * in the 'cpu address' field associated with the - * external interrupt. + * Get the external interruption subcode & pfault initial/completion + * signal bit. VM stores this in the 'cpu address' field associated + * with the external interrupt. */ subcode = ext_code.subcode; if ((subcode & 0xff00) != __SUBCODE_MASK) @@ -658,7 +680,7 @@ static void pfault_interrupt(struct ext_code ext_code, if (!tsk) return; spin_lock(&pfault_lock); - if (subcode & 0x0080) { + if (subcode & PF_COMPLETE) { /* signal bit is set -> a page has been swapped in by VM */ if (tsk->thread.pfault_wait == 1) { /* Initial interrupt was faster than the completion @@ -687,8 +709,7 @@ static void pfault_interrupt(struct ext_code ext_code, goto out; if (tsk->thread.pfault_wait == 1) { /* Already on the list with a reference: put to sleep */ - __set_task_state(tsk, TASK_UNINTERRUPTIBLE); - set_tsk_need_resched(tsk); + goto block; } else if (tsk->thread.pfault_wait == -1) { /* Completion interrupt was faster than the initial * interrupt (pfault_wait == -1). Set pfault_wait @@ -703,7 +724,11 @@ static void pfault_interrupt(struct ext_code ext_code, get_task_struct(tsk); tsk->thread.pfault_wait = 1; list_add(&tsk->thread.list, &pfault_list); - __set_task_state(tsk, TASK_UNINTERRUPTIBLE); +block: + /* Since this must be a userspace fault, there + * is no kernel task state to trample. Rely on the + * return to userspace schedule() to block. */ + __set_current_state(TASK_UNINTERRUPTIBLE); set_tsk_need_resched(tsk); } } |