diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/amd.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 62 | ||||
-rw-r--r-- | arch/x86/kernel/crash.c | 334 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/kexec-bzimage64.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_64.c | 111 | ||||
-rw-r--r-- | arch/x86/kernel/pci-dma.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/signal_compat.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/x86_init.c | 1 |
12 files changed, 194 insertions, 373 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 348cf4821240..4702fbd98f92 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -487,7 +487,7 @@ void load_percpu_segment(int cpu) loadsegment(fs, __KERNEL_PERCPU); #else __loadsegment_simple(gs, 0); - wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); + wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); #endif load_stack_canary_segment(); } @@ -1398,6 +1398,7 @@ __setup("clearcpuid=", setup_clearcpuid); #ifdef CONFIG_X86_64 DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE) __visible; +EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); /* * The following percpu variables are hot. Align current_task to diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 48179928ff38..0624957aa068 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -58,7 +58,7 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE]; /* * Microcode patch container file is prepended to the initrd in cpio - * format. See Documentation/x86/early-microcode.txt + * format. See Documentation/x86/microcode.txt */ static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 9340f41ce8d3..031082c96db8 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -22,7 +22,7 @@ #include <linux/kexec.h> #include <asm/processor.h> #include <asm/hypervisor.h> -#include <asm/hyperv.h> +#include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> #include <asm/desc.h> #include <asm/irq_regs.h> @@ -37,6 +37,7 @@ EXPORT_SYMBOL_GPL(ms_hyperv); #if IS_ENABLED(CONFIG_HYPERV) static void (*vmbus_handler)(void); +static void (*hv_stimer0_handler)(void); static void (*hv_kexec_handler)(void); static void (*hv_crash_handler)(struct pt_regs *regs); @@ -69,6 +70,41 @@ void hv_remove_vmbus_irq(void) EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq); EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); +/* + * Routines to do per-architecture handling of stimer0 + * interrupts when in Direct Mode + */ + +__visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + entering_irq(); + inc_irq_stat(hyperv_stimer0_count); + if (hv_stimer0_handler) + hv_stimer0_handler(); + ack_APIC_irq(); + + exiting_irq(); + set_irq_regs(old_regs); +} + +int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) +{ + *vector = HYPERV_STIMER0_VECTOR; + *irq = 0; /* Unused on x86/x64 */ + hv_stimer0_handler = handler; + return 0; +} +EXPORT_SYMBOL_GPL(hv_setup_stimer0_irq); + +void hv_remove_stimer0_irq(int irq) +{ + /* We have no way to deallocate the interrupt gate */ + hv_stimer0_handler = NULL; +} +EXPORT_SYMBOL_GPL(hv_remove_stimer0_irq); + void hv_setup_kexec_handler(void (*handler)(void)) { hv_kexec_handler = handler; @@ -180,8 +216,8 @@ static void __init ms_hyperv_init_platform(void) pr_info("Hyper-V: features 0x%x, hints 0x%x\n", ms_hyperv.features, ms_hyperv.hints); - ms_hyperv.max_vp_index = cpuid_eax(HVCPUID_IMPLEMENTATION_LIMITS); - ms_hyperv.max_lp_index = cpuid_ebx(HVCPUID_IMPLEMENTATION_LIMITS); + ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); + ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n", ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); @@ -189,11 +225,12 @@ static void __init ms_hyperv_init_platform(void) /* * Extract host information. */ - if (cpuid_eax(HVCPUID_VENDOR_MAXFUNCTION) >= HVCPUID_VERSION) { - hv_host_info_eax = cpuid_eax(HVCPUID_VERSION); - hv_host_info_ebx = cpuid_ebx(HVCPUID_VERSION); - hv_host_info_ecx = cpuid_ecx(HVCPUID_VERSION); - hv_host_info_edx = cpuid_edx(HVCPUID_VERSION); + if (cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS) >= + HYPERV_CPUID_VERSION) { + hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION); + hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION); + hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION); + hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION); pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n", hv_host_info_eax, hv_host_info_ebx >> 16, @@ -207,6 +244,11 @@ static void __init ms_hyperv_init_platform(void) x86_platform.calibrate_cpu = hv_get_tsc_khz; } + if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) { + ms_hyperv.nested_features = + cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); + } + #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { @@ -257,6 +299,10 @@ static void __init ms_hyperv_init_platform(void) alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR, hyperv_reenlightenment_vector); + /* Setup the IDT for stimer0 */ + if (ms_hyperv.misc_features & HV_X64_STIMER_DIRECT_MODE_AVAILABLE) + alloc_intr_gate(HYPERV_STIMER0_VECTOR, + hv_stimer0_callback_vector); #endif } diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 1f6680427ff0..f631a3f15587 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -38,37 +38,6 @@ #include <asm/virtext.h> #include <asm/intel_pt.h> -/* Alignment required for elf header segment */ -#define ELF_CORE_HEADER_ALIGN 4096 - -/* This primarily represents number of split ranges due to exclusion */ -#define CRASH_MAX_RANGES 16 - -struct crash_mem_range { - u64 start, end; -}; - -struct crash_mem { - unsigned int nr_ranges; - struct crash_mem_range ranges[CRASH_MAX_RANGES]; -}; - -/* Misc data about ram ranges needed to prepare elf headers */ -struct crash_elf_data { - struct kimage *image; - /* - * Total number of ram ranges we have after various adjustments for - * crash reserved region, etc. - */ - unsigned int max_nr_ranges; - - /* Pointer to elf header */ - void *ehdr; - /* Pointer to next phdr */ - void *bufp; - struct crash_mem mem; -}; - /* Used while preparing memory map entries for second kernel */ struct crash_memmap_data { struct boot_params *params; @@ -218,124 +187,49 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg) return 0; } - /* Gather all the required information to prepare elf headers for ram regions */ -static void fill_up_crash_elf_data(struct crash_elf_data *ced, - struct kimage *image) +static struct crash_mem *fill_up_crash_elf_data(void) { unsigned int nr_ranges = 0; - - ced->image = image; + struct crash_mem *cmem; walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + if (!nr_ranges) + return NULL; - ced->max_nr_ranges = nr_ranges; - - /* Exclusion of crash region could split memory ranges */ - ced->max_nr_ranges++; - - /* If crashk_low_res is not 0, another range split possible */ - if (crashk_low_res.end) - ced->max_nr_ranges++; -} - -static int exclude_mem_range(struct crash_mem *mem, - unsigned long long mstart, unsigned long long mend) -{ - int i, j; - unsigned long long start, end; - struct crash_mem_range temp_range = {0, 0}; - - for (i = 0; i < mem->nr_ranges; i++) { - start = mem->ranges[i].start; - end = mem->ranges[i].end; - - if (mstart > end || mend < start) - continue; - - /* Truncate any area outside of range */ - if (mstart < start) - mstart = start; - if (mend > end) - mend = end; - - /* Found completely overlapping range */ - if (mstart == start && mend == end) { - mem->ranges[i].start = 0; - mem->ranges[i].end = 0; - if (i < mem->nr_ranges - 1) { - /* Shift rest of the ranges to left */ - for (j = i; j < mem->nr_ranges - 1; j++) { - mem->ranges[j].start = - mem->ranges[j+1].start; - mem->ranges[j].end = - mem->ranges[j+1].end; - } - } - mem->nr_ranges--; - return 0; - } - - if (mstart > start && mend < end) { - /* Split original range */ - mem->ranges[i].end = mstart - 1; - temp_range.start = mend + 1; - temp_range.end = end; - } else if (mstart != start) - mem->ranges[i].end = mstart - 1; - else - mem->ranges[i].start = mend + 1; - break; - } + /* + * Exclusion of crash region and/or crashk_low_res may cause + * another range split. So add extra two slots here. + */ + nr_ranges += 2; + cmem = vzalloc(sizeof(struct crash_mem) + + sizeof(struct crash_mem_range) * nr_ranges); + if (!cmem) + return NULL; - /* If a split happend, add the split to array */ - if (!temp_range.end) - return 0; + cmem->max_nr_ranges = nr_ranges; + cmem->nr_ranges = 0; - /* Split happened */ - if (i == CRASH_MAX_RANGES - 1) { - pr_err("Too many crash ranges after split\n"); - return -ENOMEM; - } - - /* Location where new range should go */ - j = i + 1; - if (j < mem->nr_ranges) { - /* Move over all ranges one slot towards the end */ - for (i = mem->nr_ranges - 1; i >= j; i--) - mem->ranges[i + 1] = mem->ranges[i]; - } - - mem->ranges[j].start = temp_range.start; - mem->ranges[j].end = temp_range.end; - mem->nr_ranges++; - return 0; + return cmem; } /* * Look for any unwanted ranges between mstart, mend and remove them. This - * might lead to split and split ranges are put in ced->mem.ranges[] array + * might lead to split and split ranges are put in cmem->ranges[] array */ -static int elf_header_exclude_ranges(struct crash_elf_data *ced, - unsigned long long mstart, unsigned long long mend) +static int elf_header_exclude_ranges(struct crash_mem *cmem) { - struct crash_mem *cmem = &ced->mem; int ret = 0; - memset(cmem->ranges, 0, sizeof(cmem->ranges)); - - cmem->ranges[0].start = mstart; - cmem->ranges[0].end = mend; - cmem->nr_ranges = 1; - /* Exclude crashkernel region */ - ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); if (ret) return ret; if (crashk_low_res.end) { - ret = exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, + crashk_low_res.end); if (ret) return ret; } @@ -345,144 +239,12 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced, static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { - struct crash_elf_data *ced = arg; - Elf64_Ehdr *ehdr; - Elf64_Phdr *phdr; - unsigned long mstart, mend; - struct kimage *image = ced->image; - struct crash_mem *cmem; - int ret, i; - - ehdr = ced->ehdr; - - /* Exclude unwanted mem ranges */ - ret = elf_header_exclude_ranges(ced, res->start, res->end); - if (ret) - return ret; - - /* Go through all the ranges in ced->mem.ranges[] and prepare phdr */ - cmem = &ced->mem; - - for (i = 0; i < cmem->nr_ranges; i++) { - mstart = cmem->ranges[i].start; - mend = cmem->ranges[i].end; - - phdr = ced->bufp; - ced->bufp += sizeof(Elf64_Phdr); - - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R|PF_W|PF_X; - phdr->p_offset = mstart; - - /* - * If a range matches backup region, adjust offset to backup - * segment. - */ - if (mstart == image->arch.backup_src_start && - (mend - mstart + 1) == image->arch.backup_src_sz) - phdr->p_offset = image->arch.backup_load_addr; - - phdr->p_paddr = mstart; - phdr->p_vaddr = (unsigned long long) __va(mstart); - phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; - phdr->p_align = 0; - ehdr->e_phnum++; - pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", - phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, - ehdr->e_phnum, phdr->p_offset); - } - - return ret; -} - -static int prepare_elf64_headers(struct crash_elf_data *ced, - void **addr, unsigned long *sz) -{ - Elf64_Ehdr *ehdr; - Elf64_Phdr *phdr; - unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; - unsigned char *buf, *bufp; - unsigned int cpu; - unsigned long long notes_addr; - int ret; + struct crash_mem *cmem = arg; - /* extra phdr for vmcoreinfo elf note */ - nr_phdr = nr_cpus + 1; - nr_phdr += ced->max_nr_ranges; - - /* - * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping - * area on x86_64 (ffffffff80000000 - ffffffffa0000000). - * I think this is required by tools like gdb. So same physical - * memory will be mapped in two elf headers. One will contain kernel - * text virtual addresses and other will have __va(physical) addresses. - */ + cmem->ranges[cmem->nr_ranges].start = res->start; + cmem->ranges[cmem->nr_ranges].end = res->end; + cmem->nr_ranges++; - nr_phdr++; - elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); - elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); - - buf = vzalloc(elf_sz); - if (!buf) - return -ENOMEM; - - bufp = buf; - ehdr = (Elf64_Ehdr *)bufp; - bufp += sizeof(Elf64_Ehdr); - memcpy(ehdr->e_ident, ELFMAG, SELFMAG); - ehdr->e_ident[EI_CLASS] = ELFCLASS64; - ehdr->e_ident[EI_DATA] = ELFDATA2LSB; - ehdr->e_ident[EI_VERSION] = EV_CURRENT; - ehdr->e_ident[EI_OSABI] = ELF_OSABI; - memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); - ehdr->e_type = ET_CORE; - ehdr->e_machine = ELF_ARCH; - ehdr->e_version = EV_CURRENT; - ehdr->e_phoff = sizeof(Elf64_Ehdr); - ehdr->e_ehsize = sizeof(Elf64_Ehdr); - ehdr->e_phentsize = sizeof(Elf64_Phdr); - - /* Prepare one phdr of type PT_NOTE for each present cpu */ - for_each_present_cpu(cpu) { - phdr = (Elf64_Phdr *)bufp; - bufp += sizeof(Elf64_Phdr); - phdr->p_type = PT_NOTE; - notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); - phdr->p_offset = phdr->p_paddr = notes_addr; - phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); - (ehdr->e_phnum)++; - } - - /* Prepare one PT_NOTE header for vmcoreinfo */ - phdr = (Elf64_Phdr *)bufp; - bufp += sizeof(Elf64_Phdr); - phdr->p_type = PT_NOTE; - phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); - phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE; - (ehdr->e_phnum)++; - -#ifdef CONFIG_X86_64 - /* Prepare PT_LOAD type program header for kernel text region */ - phdr = (Elf64_Phdr *)bufp; - bufp += sizeof(Elf64_Phdr); - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R|PF_W|PF_X; - phdr->p_vaddr = (Elf64_Addr)_text; - phdr->p_filesz = phdr->p_memsz = _end - _text; - phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); - (ehdr->e_phnum)++; -#endif - - /* Prepare PT_LOAD headers for system ram chunks. */ - ced->ehdr = ehdr; - ced->bufp = bufp; - ret = walk_system_ram_res(0, -1, ced, - prepare_elf64_ram_headers_callback); - if (ret < 0) - return ret; - - *addr = buf; - *sz = elf_sz; return 0; } @@ -490,18 +252,46 @@ static int prepare_elf64_headers(struct crash_elf_data *ced, static int prepare_elf_headers(struct kimage *image, void **addr, unsigned long *sz) { - struct crash_elf_data *ced; - int ret; + struct crash_mem *cmem; + Elf64_Ehdr *ehdr; + Elf64_Phdr *phdr; + int ret, i; - ced = kzalloc(sizeof(*ced), GFP_KERNEL); - if (!ced) + cmem = fill_up_crash_elf_data(); + if (!cmem) return -ENOMEM; - fill_up_crash_elf_data(ced, image); + ret = walk_system_ram_res(0, -1, cmem, + prepare_elf64_ram_headers_callback); + if (ret) + goto out; + + /* Exclude unwanted mem ranges */ + ret = elf_header_exclude_ranges(cmem); + if (ret) + goto out; /* By default prepare 64bit headers */ - ret = prepare_elf64_headers(ced, addr, sz); - kfree(ced); + ret = crash_prepare_elf64_headers(cmem, + IS_ENABLED(CONFIG_X86_64), addr, sz); + if (ret) + goto out; + + /* + * If a range matches backup region, adjust offset to backup + * segment. + */ + ehdr = (Elf64_Ehdr *)*addr; + phdr = (Elf64_Phdr *)(ehdr + 1); + for (i = 0; i < ehdr->e_phnum; phdr++, i++) + if (phdr->p_type == PT_LOAD && + phdr->p_paddr == image->arch.backup_src_start && + phdr->p_memsz == image->arch.backup_src_sz) { + phdr->p_offset = image->arch.backup_load_addr; + break; + } +out: + vfree(cmem); return ret; } @@ -547,14 +337,14 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem, /* Exclude Backup region */ start = image->arch.backup_load_addr; end = start + image->arch.backup_src_sz - 1; - ret = exclude_mem_range(cmem, start, end); + ret = crash_exclude_mem_range(cmem, start, end); if (ret) return ret; /* Exclude elf header region */ start = image->arch.elf_load_addr; end = start + image->arch.elf_headers_sz - 1; - return exclude_mem_range(cmem, start, end); + return crash_exclude_mem_range(cmem, start, end); } /* Prepare memory map for crash dump kernel */ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 45fb4d2565f8..328d027d829d 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -150,6 +150,13 @@ int arch_show_interrupts(struct seq_file *p, int prec) irq_stats(j)->irq_hv_reenlightenment_count); seq_puts(p, " Hyper-V reenlightenment interrupts\n"); } + if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) { + seq_printf(p, "%*s: ", prec, "HVS"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->hyperv_stimer0_count); + seq_puts(p, " Hyper-V stimer0 interrupts\n"); + } #endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index fb095ba0c02f..3182908b7e6c 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -334,7 +334,6 @@ static void *bzImage64_load(struct kimage *image, char *kernel, unsigned long setup_header_size, params_cmdline_sz; struct boot_params *params; unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr; - unsigned long purgatory_load_addr; struct bzimage64_data *ldata; struct kexec_entry64_regs regs64; void *stack; @@ -342,6 +341,8 @@ static void *bzImage64_load(struct kimage *image, char *kernel, unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset; struct kexec_buf kbuf = { .image = image, .buf_max = ULONG_MAX, .top_down = true }; + struct kexec_buf pbuf = { .image = image, .buf_min = MIN_PURGATORY_ADDR, + .buf_max = ULONG_MAX, .top_down = true }; header = (struct setup_header *)(kernel + setup_hdr_offset); setup_sects = header->setup_sects; @@ -379,14 +380,13 @@ static void *bzImage64_load(struct kimage *image, char *kernel, * Load purgatory. For 64bit entry point, purgatory code can be * anywhere. */ - ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1, - &purgatory_load_addr); + ret = kexec_load_purgatory(image, &pbuf); if (ret) { pr_err("Loading purgatory failed\n"); return ERR_PTR(ret); } - pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); + pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem); /* @@ -538,7 +538,7 @@ static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len) } #endif -struct kexec_file_ops kexec_bzImage64_ops = { +const struct kexec_file_ops kexec_bzImage64_ops = { .probe = bzImage64_probe, .load = bzImage64_load, .cleanup = bzImage64_cleanup, diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index fae86e36e399..7867417cfaff 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -454,6 +454,13 @@ static void __init sev_map_percpu_data(void) } #ifdef CONFIG_SMP +static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) +{ + native_smp_prepare_cpus(max_cpus); + if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + static_branch_disable(&virt_spin_lock_key); +} + static void __init kvm_smp_prepare_boot_cpu(void) { /* @@ -546,6 +553,7 @@ static void __init kvm_guest_init(void) } if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_hint(KVM_HINTS_DEDICATED) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; @@ -556,6 +564,7 @@ static void __init kvm_guest_init(void) kvm_setup_vsyscall_timeinfo(); #ifdef CONFIG_SMP + smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online", kvm_cpu_online, kvm_cpu_down_prepare) < 0) @@ -605,6 +614,11 @@ unsigned int kvm_arch_para_features(void) return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES); } +unsigned int kvm_arch_para_hints(void) +{ + return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES); +} + static uint32_t __init kvm_detect(void) { return kvm_cpuid_base(); @@ -635,6 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void) int cpu; if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_hint(KVM_HINTS_DEDICATED) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), @@ -730,6 +745,9 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; + if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + return; + __pv_init_lock_hash(); pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 93bd4fb603d1..a5e55d832d0a 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -30,8 +30,9 @@ #include <asm/set_memory.h> #ifdef CONFIG_KEXEC_FILE -static struct kexec_file_ops *kexec_file_loaders[] = { +const struct kexec_file_ops * const kexec_file_loaders[] = { &kexec_bzImage64_ops, + NULL }; #endif @@ -364,27 +365,6 @@ void arch_crash_save_vmcoreinfo(void) /* arch-dependent functionality related to kexec file-based syscall */ #ifdef CONFIG_KEXEC_FILE -int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len) -{ - int i, ret = -ENOEXEC; - struct kexec_file_ops *fops; - - for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { - fops = kexec_file_loaders[i]; - if (!fops || !fops->probe) - continue; - - ret = fops->probe(buf, buf_len); - if (!ret) { - image->fops = fops; - return ret; - } - } - - return ret; -} - void *arch_kexec_kernel_image_load(struct kimage *image) { vfree(image->arch.elf_headers); @@ -399,88 +379,53 @@ void *arch_kexec_kernel_image_load(struct kimage *image) image->cmdline_buf_len); } -int arch_kimage_file_post_load_cleanup(struct kimage *image) -{ - if (!image->fops || !image->fops->cleanup) - return 0; - - return image->fops->cleanup(image->image_loader_data); -} - -#ifdef CONFIG_KEXEC_VERIFY_SIG -int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel, - unsigned long kernel_len) -{ - if (!image->fops || !image->fops->verify_sig) { - pr_debug("kernel loader does not support signature verification."); - return -EKEYREJECTED; - } - - return image->fops->verify_sig(kernel, kernel_len); -} -#endif - /* * Apply purgatory relocations. * - * ehdr: Pointer to elf headers - * sechdrs: Pointer to section headers. - * relsec: section index of SHT_RELA section. + * @pi: Purgatory to be relocated. + * @section: Section relocations applying to. + * @relsec: Section containing RELAs. + * @symtabsec: Corresponding symtab. * * TODO: Some of the code belongs to generic code. Move that in kexec.c. */ -int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, - Elf64_Shdr *sechdrs, unsigned int relsec) +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, const Elf_Shdr *relsec, + const Elf_Shdr *symtabsec) { unsigned int i; Elf64_Rela *rel; Elf64_Sym *sym; void *location; - Elf64_Shdr *section, *symtabsec; unsigned long address, sec_base, value; const char *strtab, *name, *shstrtab; + const Elf_Shdr *sechdrs; - /* - * ->sh_offset has been modified to keep the pointer to section - * contents in memory - */ - rel = (void *)sechdrs[relsec].sh_offset; - - /* Section to which relocations apply */ - section = &sechdrs[sechdrs[relsec].sh_info]; - - pr_debug("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); - - /* Associated symbol table */ - symtabsec = &sechdrs[sechdrs[relsec].sh_link]; - - /* String table */ - if (symtabsec->sh_link >= ehdr->e_shnum) { - /* Invalid strtab section number */ - pr_err("Invalid string table section index %d\n", - symtabsec->sh_link); - return -ENOEXEC; - } + /* String & section header string table */ + sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; + strtab = (char *)pi->ehdr + sechdrs[symtabsec->sh_link].sh_offset; + shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; - strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset; + rel = (void *)pi->ehdr + relsec->sh_offset; - /* section header string table */ - shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset; + pr_debug("Applying relocate section %s to %u\n", + shstrtab + relsec->sh_name, relsec->sh_info); - for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + for (i = 0; i < relsec->sh_size / sizeof(*rel); i++) { /* * rel[i].r_offset contains byte offset from beginning * of section to the storage unit affected. * - * This is location to update (->sh_offset). This is temporary - * buffer where section is currently loaded. This will finally - * be loaded to a different address later, pointed to by + * This is location to update. This is temporary buffer + * where section is currently loaded. This will finally be + * loaded to a different address later, pointed to by * ->sh_addr. kexec takes care of moving it * (kexec_load_segment()). */ - location = (void *)(section->sh_offset + rel[i].r_offset); + location = pi->purgatory_buf; + location += section->sh_offset; + location += rel[i].r_offset; /* Final address of the location */ address = section->sh_addr + rel[i].r_offset; @@ -491,8 +436,8 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get * these respectively. */ - sym = (Elf64_Sym *)symtabsec->sh_offset + - ELF64_R_SYM(rel[i].r_info); + sym = (void *)pi->ehdr + symtabsec->sh_offset; + sym += ELF64_R_SYM(rel[i].r_info); if (sym->st_name) name = strtab + sym->st_name; @@ -515,12 +460,12 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, if (sym->st_shndx == SHN_ABS) sec_base = 0; - else if (sym->st_shndx >= ehdr->e_shnum) { + else if (sym->st_shndx >= pi->ehdr->e_shnum) { pr_err("Invalid section %d for symbol %s\n", sym->st_shndx, name); return -ENOEXEC; } else - sec_base = sechdrs[sym->st_shndx].sh_addr; + sec_base = pi->sechdrs[sym->st_shndx].sh_addr; value = sym->st_value; value += sec_base; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 14437116ffea..77625b60a510 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -6,7 +6,6 @@ #include <linux/bootmem.h> #include <linux/gfp.h> #include <linux/pci.h> -#include <linux/kmemleak.h> #include <asm/proto.h> #include <asm/dma.h> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9eb448c7859d..4b100fe0f508 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -205,6 +205,20 @@ static __always_inline void save_fsgs(struct task_struct *task) save_base_legacy(task, task->thread.gsindex, GS); } +#if IS_ENABLED(CONFIG_KVM) +/* + * While a process is running,current->thread.fsbase and current->thread.gsbase + * may not match the corresponding CPU registers (see save_base_legacy()). KVM + * wants an efficient way to save and restore FSBASE and GSBASE. + * When FSGSBASE extensions are enabled, this will have to use RD{FS,GS}BASE. + */ +void save_fsgs_for_kvm(void) +{ + save_fsgs(current); +} +EXPORT_SYMBOL_GPL(save_fsgs_for_kvm); +#endif + static __always_inline void loadseg(enum which_selector which, unsigned short sel) { diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 0d930d8987cc..14c057f29979 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -26,8 +26,8 @@ static inline void signal_compat_build_tests(void) * new fields are handled in copy_siginfo_to_user32()! */ BUILD_BUG_ON(NSIGILL != 11); - BUILD_BUG_ON(NSIGFPE != 13); - BUILD_BUG_ON(NSIGSEGV != 4); + BUILD_BUG_ON(NSIGFPE != 15); + BUILD_BUG_ON(NSIGSEGV != 7); BUILD_BUG_ON(NSIGBUS != 5); BUILD_BUG_ON(NSIGTRAP != 4); BUILD_BUG_ON(NSIGCHLD != 6); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ebda84a91510..3ab867603e81 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = { .guest_late_init = x86_init_noop, .x2apic_available = bool_x86_init_noop, .init_mem_mapping = x86_init_noop, + .init_after_bootmem = x86_init_noop, }, .acpi = { |