summaryrefslogtreecommitdiffstats
path: root/arch/x86/lguest
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/lguest')
-rw-r--r--arch/x86/lguest/boot.c47
-rw-r--r--arch/x86/lguest/i386_head.S105
2 files changed, 130 insertions, 22 deletions
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 9257510b4836..4996cf5f73a0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -324,9 +324,8 @@ static void lguest_load_gdt(const struct desc_ptr *desc)
}
/*
- * For a single GDT entry which changes, we do the lazy thing: alter our GDT,
- * then tell the Host to reload the entire thing. This operation is so rare
- * that this naive implementation is reasonable.
+ * For a single GDT entry which changes, we simply change our copy and
+ * then tell the host about it.
*/
static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
const void *desc, int type)
@@ -338,9 +337,13 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
}
/*
- * OK, I lied. There are three "thread local storage" GDT entries which change
+ * There are three "thread local storage" GDT entries which change
* on every context switch (these three entries are how glibc implements
- * __thread variables). So we have a hypercall specifically for this case.
+ * __thread variables). As an optimization, we have a hypercall
+ * specifically for this case.
+ *
+ * Wouldn't it be nicer to have a general LOAD_GDT_ENTRIES hypercall
+ * which took a range of entries?
*/
static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
{
@@ -528,7 +531,10 @@ static void lguest_write_cr3(unsigned long cr3)
{
lguest_data.pgdir = cr3;
lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
- cr3_changed = true;
+
+ /* These two page tables are simple, linear, and used during boot */
+ if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table))
+ cr3_changed = true;
}
static unsigned long lguest_read_cr3(void)
@@ -700,9 +706,9 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
* to forget all of them. Fortunately, this is very rare.
*
* ... except in early boot when the kernel sets up the initial pagetables,
- * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell
- * the Host anything changed until we've done the first page table switch,
- * which brings boot back to 0.25 seconds.
+ * which makes booting astonishingly slow: 48 seconds! So we don't even tell
+ * the Host anything changed until we've done the first real page table switch,
+ * which brings boot back to 4.3 seconds.
*/
static void lguest_set_pte(pte_t *ptep, pte_t pteval)
{
@@ -788,22 +794,22 @@ static void lguest_flush_tlb_kernel(void)
* simple as setting a bit. We don't actually "ack" interrupts as such, we
* just mask and unmask them. I wonder if we should be cleverer?
*/
-static void disable_lguest_irq(unsigned int irq)
+static void disable_lguest_irq(struct irq_data *data)
{
- set_bit(irq, lguest_data.blocked_interrupts);
+ set_bit(data->irq, lguest_data.blocked_interrupts);
}
-static void enable_lguest_irq(unsigned int irq)
+static void enable_lguest_irq(struct irq_data *data)
{
- clear_bit(irq, lguest_data.blocked_interrupts);
+ clear_bit(data->irq, lguest_data.blocked_interrupts);
}
/* This structure describes the lguest IRQ controller. */
static struct irq_chip lguest_irq_controller = {
.name = "lguest",
- .mask = disable_lguest_irq,
- .mask_ack = disable_lguest_irq,
- .unmask = enable_lguest_irq,
+ .irq_mask = disable_lguest_irq,
+ .irq_mask_ack = disable_lguest_irq,
+ .irq_unmask = enable_lguest_irq,
};
/*
@@ -835,12 +841,12 @@ static void __init lguest_init_IRQ(void)
* rather than set them in lguest_init_IRQ we are called here every time an
* lguest device needs an interrupt.
*
- * FIXME: irq_to_desc_alloc_node() can fail due to lack of memory, we should
+ * FIXME: irq_alloc_desc_at() can fail due to lack of memory, we should
* pass that up!
*/
void lguest_setup_irq(unsigned int irq)
{
- irq_to_desc_alloc_node(irq, 0);
+ irq_alloc_desc_at(irq, 0);
set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
handle_level_irq, "level");
}
@@ -999,7 +1005,7 @@ static void lguest_time_init(void)
clockevents_register_device(&lguest_clockevent);
/* Finally, we unblock the timer interrupt. */
- enable_lguest_irq(0);
+ clear_bit(0, lguest_data.blocked_interrupts);
}
/*
@@ -1346,9 +1352,6 @@ __init void lguest_init(void)
*/
switch_to_new_gdt(0);
- /* We actually boot with all memory mapped, but let's say 128MB. */
- max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
-
/*
* The Host<->Guest Switcher lives at the top of our address space, and
* the Host told us how big it is when we made LGUEST_INIT hypercall:
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 4f420c2f2d55..e7d5382ef263 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -4,6 +4,7 @@
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/processor-flags.h>
+#include <asm/pgtable.h>
/*G:020
* Our story starts with the kernel booting into startup_32 in
@@ -37,9 +38,113 @@ ENTRY(lguest_entry)
/* Set up the initial stack so we can run C code. */
movl $(init_thread_union+THREAD_SIZE),%esp
+ call init_pagetables
+
/* Jumps are relative: we're running __PAGE_OFFSET too low. */
jmp lguest_init+__PAGE_OFFSET
+/*
+ * Initialize page tables. This creates a PDE and a set of page
+ * tables, which are located immediately beyond __brk_base. The variable
+ * _brk_end is set up to point to the first "safe" location.
+ * Mappings are created both at virtual address 0 (identity mapping)
+ * and PAGE_OFFSET for up to _end.
+ *
+ * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they
+ * don't have a stack at this point, so we can't just use call and ret.
+ */
+init_pagetables:
+#if PTRS_PER_PMD > 1
+#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
+#else
+#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
+#endif
+#define pa(X) ((X) - __PAGE_OFFSET)
+
+/* Enough space to fit pagetables for the low memory linear map */
+MAPPING_BEYOND_END = \
+ PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
+#ifdef CONFIG_X86_PAE
+
+ /*
+ * In PAE mode initial_page_table is statically defined to contain
+ * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
+ * entries). The identity mapping is handled by pointing two PGD entries
+ * to the first kernel PMD.
+ *
+ * Note the upper half of each PMD or PTE are always zero at this stage.
+ */
+
+#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
+
+ xorl %ebx,%ebx /* %ebx is kept at zero */
+
+ movl $pa(__brk_base), %edi
+ movl $pa(initial_pg_pmd), %edx
+ movl $PTE_IDENT_ATTR, %eax
+10:
+ leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
+ movl %ecx,(%edx) /* Store PMD entry */
+ /* Upper half already zero */
+ addl $8,%edx
+ movl $512,%ecx
+11:
+ stosl
+ xchgl %eax,%ebx
+ stosl
+ xchgl %eax,%ebx
+ addl $0x1000,%eax
+ loop 11b
+
+ /*
+ * End condition: we must map up to the end + MAPPING_BEYOND_END.
+ */
+ movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
+ cmpl %ebp,%eax
+ jb 10b
+1:
+ addl $__PAGE_OFFSET, %edi
+ movl %edi, pa(_brk_end)
+ shrl $12, %eax
+ movl %eax, pa(max_pfn_mapped)
+
+ /* Do early initialization of the fixmap area */
+ movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+ movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
+#else /* Not PAE */
+
+page_pde_offset = (__PAGE_OFFSET >> 20);
+
+ movl $pa(__brk_base), %edi
+ movl $pa(initial_page_table), %edx
+ movl $PTE_IDENT_ATTR, %eax
+10:
+ leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
+ movl %ecx,(%edx) /* Store identity PDE entry */
+ movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
+ addl $4,%edx
+ movl $1024, %ecx
+11:
+ stosl
+ addl $0x1000,%eax
+ loop 11b
+ /*
+ * End condition: we must map up to the end + MAPPING_BEYOND_END.
+ */
+ movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
+ cmpl %ebp,%eax
+ jb 10b
+ addl $__PAGE_OFFSET, %edi
+ movl %edi, pa(_brk_end)
+ shrl $12, %eax
+ movl %eax, pa(max_pfn_mapped)
+
+ /* Do early initialization of the fixmap area */
+ movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
+ movl %eax,pa(initial_page_table+0xffc)
+#endif
+ ret
+
/*G:055
* We create a macro which puts the assembler code between lgstart_ and lgend_
* markers. These templates are put in the .text section: they can't be
OpenPOWER on IntegriCloud