diff options
Diffstat (limited to 'arch/powerpc')
27 files changed, 244 insertions, 656 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d8dcd8820369..891cd23f9c8b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -511,6 +511,7 @@ config KEXEC_FILE select KEXEC_CORE select HAVE_IMA_KEXEC select BUILD_BIN2C + select KEXEC_ELF depends on PPC64 depends on CRYPTO=y depends on CRYPTO_SHA256=y diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index c345b79414a9..403f7e193833 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -39,13 +39,11 @@ endif uname := $(shell uname -m) KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig -ifdef CONFIG_PPC64 new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi) ifeq ($(new_nm),y) NM := $(NM) --synthetic endif -endif # BITS is used as extension for files which are available in a 32 bit # and a 64 bit version to simplify shared Makefiles. diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 9a1d2fc6ceb7..64870c7be4a3 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -11,4 +11,3 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += vtime.h generic-y += msi.h -generic-y += simd.h diff --git a/arch/powerpc/include/asm/error-injection.h b/arch/powerpc/include/asm/error-injection.h deleted file mode 100644 index 62fd24739852..000000000000 --- a/arch/powerpc/include/asm/error-injection.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ - -#ifndef _ASM_ERROR_INJECTION_H -#define _ASM_ERROR_INJECTION_H - -#include <linux/compiler.h> -#include <linux/linkage.h> -#include <asm/ptrace.h> -#include <asm-generic/error-injection.h> - -void override_function_with_return(struct pt_regs *regs); - -#endif /* _ASM_ERROR_INJECTION_H */ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index bb7c8cc77f1a..04b2b927bb5a 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -535,7 +535,7 @@ static inline void note_hpte_modification(struct kvm *kvm, */ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm) { - return rcu_dereference_raw_notrace(kvm->memslots[0]); + return rcu_dereference_raw_check(kvm->memslots[0]); } extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e6e5f59aaa97..6fb5fb4779e0 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -232,11 +232,25 @@ struct revmap_entry { }; /* - * We use the top bit of each memslot->arch.rmap entry as a lock bit, - * and bit 32 as a present flag. The bottom 32 bits are the - * index in the guest HPT of a HPTE that points to the page. + * The rmap array of size number of guest pages is allocated for each memslot. + * This array is used to store usage specific information about the guest page. + * Below are the encodings of the various possible usage types. */ -#define KVMPPC_RMAP_LOCK_BIT 63 +/* Free bits which can be used to define a new usage */ +#define KVMPPC_RMAP_TYPE_MASK 0xff00000000000000 +#define KVMPPC_RMAP_NESTED 0xc000000000000000 /* Nested rmap array */ +#define KVMPPC_RMAP_HPT 0x0100000000000000 /* HPT guest */ + +/* + * rmap usage definition for a hash page table (hpt) guest: + * 0x0000080000000000 Lock bit + * 0x0000018000000000 RC bits + * 0x0000000100000000 Present bit + * 0x00000000ffffffff HPT index bits + * The bottom 32 bits are the index in the guest HPT of a HPTE that points to + * the page. + */ +#define KVMPPC_RMAP_LOCK_BIT 43 #define KVMPPC_RMAP_RC_SHIFT 32 #define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT) #define KVMPPC_RMAP_PRESENT 0x100000000ul diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2484e6a8f5ca..8e8514efb124 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -598,6 +598,7 @@ extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val); extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val); +extern bool kvmppc_xive_native_supported(void); #else static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index e4016985764e..818989e11678 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -46,7 +46,15 @@ struct xive_irq_data { /* Setup/used by frontend */ int target; + /* + * saved_p means that there is a queue entry for this interrupt + * in some CPU's queue (not including guest vcpu queues), even + * if P is not set in the source ESB. + * stale_p means that there is no queue entry for this interrupt + * in some CPU's queue, even if P is set in the source ESB. + */ bool saved_p; + bool stale_p; }; #define XIVE_IRQ_FLAG_STORE_EOI 0x01 #define XIVE_IRQ_FLAG_LSI 0x02 @@ -127,6 +135,7 @@ extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle, extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle, u32 qindex); extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state); +extern bool xive_native_has_queue_state_support(void); #else diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c index 83cf7b852876..3072fd6dbe94 100644 --- a/arch/powerpc/kernel/kexec_elf_64.c +++ b/arch/powerpc/kernel/kexec_elf_64.c @@ -23,541 +23,6 @@ #include <linux/slab.h> #include <linux/types.h> -#define PURGATORY_STACK_SIZE (16 * 1024) - -#define elf_addr_to_cpu elf64_to_cpu - -#ifndef Elf_Rel -#define Elf_Rel Elf64_Rel -#endif /* Elf_Rel */ - -struct elf_info { - /* - * Where the ELF binary contents are kept. - * Memory managed by the user of the struct. - */ - const char *buffer; - - const struct elfhdr *ehdr; - const struct elf_phdr *proghdrs; - struct elf_shdr *sechdrs; -}; - -static inline bool elf_is_elf_file(const struct elfhdr *ehdr) -{ - return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0; -} - -static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value) -{ - if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) - value = le64_to_cpu(value); - else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) - value = be64_to_cpu(value); - - return value; -} - -static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value) -{ - if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) - value = le16_to_cpu(value); - else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) - value = be16_to_cpu(value); - - return value; -} - -static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value) -{ - if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) - value = le32_to_cpu(value); - else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) - value = be32_to_cpu(value); - - return value; -} - -/** - * elf_is_ehdr_sane - check that it is safe to use the ELF header - * @buf_len: size of the buffer in which the ELF file is loaded. - */ -static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len) -{ - if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) { - pr_debug("Bad program header size.\n"); - return false; - } else if (ehdr->e_shnum > 0 && - ehdr->e_shentsize != sizeof(struct elf_shdr)) { - pr_debug("Bad section header size.\n"); - return false; - } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT || - ehdr->e_version != EV_CURRENT) { - pr_debug("Unknown ELF version.\n"); - return false; - } - - if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) { - size_t phdr_size; - - /* - * e_phnum is at most 65535 so calculating the size of the - * program header cannot overflow. - */ - phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum; - - /* Sanity check the program header table location. */ - if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) { - pr_debug("Program headers at invalid location.\n"); - return false; - } else if (ehdr->e_phoff + phdr_size > buf_len) { - pr_debug("Program headers truncated.\n"); - return false; - } - } - - if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) { - size_t shdr_size; - - /* - * e_shnum is at most 65536 so calculating - * the size of the section header cannot overflow. - */ - shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum; - - /* Sanity check the section header table location. */ - if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) { - pr_debug("Section headers at invalid location.\n"); - return false; - } else if (ehdr->e_shoff + shdr_size > buf_len) { - pr_debug("Section headers truncated.\n"); - return false; - } - } - - return true; -} - -static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr) -{ - struct elfhdr *buf_ehdr; - - if (len < sizeof(*buf_ehdr)) { - pr_debug("Buffer is too small to hold ELF header.\n"); - return -ENOEXEC; - } - - memset(ehdr, 0, sizeof(*ehdr)); - memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident)); - if (!elf_is_elf_file(ehdr)) { - pr_debug("No ELF header magic.\n"); - return -ENOEXEC; - } - - if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) { - pr_debug("Not a supported ELF class.\n"); - return -ENOEXEC; - } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && - ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { - pr_debug("Not a supported ELF data format.\n"); - return -ENOEXEC; - } - - buf_ehdr = (struct elfhdr *) buf; - if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) { - pr_debug("Bad ELF header size.\n"); - return -ENOEXEC; - } - - ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type); - ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine); - ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version); - ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry); - ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff); - ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff); - ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags); - ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize); - ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum); - ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize); - ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum); - ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx); - - return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC; -} - -/** - * elf_is_phdr_sane - check that it is safe to use the program header - * @buf_len: size of the buffer in which the ELF file is loaded. - */ -static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len) -{ - - if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) { - pr_debug("ELF segment location wraps around.\n"); - return false; - } else if (phdr->p_offset + phdr->p_filesz > buf_len) { - pr_debug("ELF segment not in file.\n"); - return false; - } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) { - pr_debug("ELF segment address wraps around.\n"); - return false; - } - - return true; -} - -static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info, - int idx) -{ - /* Override the const in proghdrs, we are the ones doing the loading. */ - struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx]; - const char *pbuf; - struct elf_phdr *buf_phdr; - - pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr)); - buf_phdr = (struct elf_phdr *) pbuf; - - phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type); - phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset); - phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr); - phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr); - phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags); - - /* - * The following fields have a type equivalent to Elf_Addr - * both in 32 bit and 64 bit ELF. - */ - phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz); - phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz); - phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align); - - return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC; -} - -/** - * elf_read_phdrs - read the program headers from the buffer - * - * This function assumes that the program header table was checked for sanity. - * Use elf_is_ehdr_sane() if it wasn't. - */ -static int elf_read_phdrs(const char *buf, size_t len, - struct elf_info *elf_info) -{ - size_t phdr_size, i; - const struct elfhdr *ehdr = elf_info->ehdr; - - /* - * e_phnum is at most 65535 so calculating the size of the - * program header cannot overflow. - */ - phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum; - - elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL); - if (!elf_info->proghdrs) - return -ENOMEM; - - for (i = 0; i < ehdr->e_phnum; i++) { - int ret; - - ret = elf_read_phdr(buf, len, elf_info, i); - if (ret) { - kfree(elf_info->proghdrs); - elf_info->proghdrs = NULL; - return ret; - } - } - - return 0; -} - -/** - * elf_is_shdr_sane - check that it is safe to use the section header - * @buf_len: size of the buffer in which the ELF file is loaded. - */ -static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len) -{ - bool size_ok; - - /* SHT_NULL headers have undefined values, so we can't check them. */ - if (shdr->sh_type == SHT_NULL) - return true; - - /* Now verify sh_entsize */ - switch (shdr->sh_type) { - case SHT_SYMTAB: - size_ok = shdr->sh_entsize == sizeof(Elf_Sym); - break; - case SHT_RELA: - size_ok = shdr->sh_entsize == sizeof(Elf_Rela); - break; - case SHT_DYNAMIC: - size_ok = shdr->sh_entsize == sizeof(Elf_Dyn); - break; - case SHT_REL: - size_ok = shdr->sh_entsize == sizeof(Elf_Rel); - break; - case SHT_NOTE: - case SHT_PROGBITS: - case SHT_HASH: - case SHT_NOBITS: - default: - /* - * This is a section whose entsize requirements - * I don't care about. If I don't know about - * the section I can't care about it's entsize - * requirements. - */ - size_ok = true; - break; - } - - if (!size_ok) { - pr_debug("ELF section with wrong entry size.\n"); - return false; - } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) { - pr_debug("ELF section address wraps around.\n"); - return false; - } - - if (shdr->sh_type != SHT_NOBITS) { - if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) { - pr_debug("ELF section location wraps around.\n"); - return false; - } else if (shdr->sh_offset + shdr->sh_size > buf_len) { - pr_debug("ELF section not in file.\n"); - return false; - } - } - - return true; -} - -static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info, - int idx) -{ - struct elf_shdr *shdr = &elf_info->sechdrs[idx]; - const struct elfhdr *ehdr = elf_info->ehdr; - const char *sbuf; - struct elf_shdr *buf_shdr; - - sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr); - buf_shdr = (struct elf_shdr *) sbuf; - - shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name); - shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type); - shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr); - shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset); - shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link); - shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info); - - /* - * The following fields have a type equivalent to Elf_Addr - * both in 32 bit and 64 bit ELF. - */ - shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags); - shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size); - shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign); - shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize); - - return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC; -} - -/** - * elf_read_shdrs - read the section headers from the buffer - * - * This function assumes that the section header table was checked for sanity. - * Use elf_is_ehdr_sane() if it wasn't. - */ -static int elf_read_shdrs(const char *buf, size_t len, - struct elf_info *elf_info) -{ - size_t shdr_size, i; - - /* - * e_shnum is at most 65536 so calculating - * the size of the section header cannot overflow. - */ - shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum; - - elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL); - if (!elf_info->sechdrs) - return -ENOMEM; - - for (i = 0; i < elf_info->ehdr->e_shnum; i++) { - int ret; - - ret = elf_read_shdr(buf, len, elf_info, i); - if (ret) { - kfree(elf_info->sechdrs); - elf_info->sechdrs = NULL; - return ret; - } - } - - return 0; -} - -/** - * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info - * @buf: Buffer to read ELF file from. - * @len: Size of @buf. - * @ehdr: Pointer to existing struct which will be populated. - * @elf_info: Pointer to existing struct which will be populated. - * - * This function allows reading ELF files with different byte order than - * the kernel, byte-swapping the fields as needed. - * - * Return: - * On success returns 0, and the caller should call elf_free_info(elf_info) to - * free the memory allocated for the section and program headers. - */ -int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr, - struct elf_info *elf_info) -{ - int ret; - - ret = elf_read_ehdr(buf, len, ehdr); - if (ret) - return ret; - - elf_info->buffer = buf; - elf_info->ehdr = ehdr; - if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) { - ret = elf_read_phdrs(buf, len, elf_info); - if (ret) - return ret; - } - if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) { - ret = elf_read_shdrs(buf, len, elf_info); - if (ret) { - kfree(elf_info->proghdrs); - return ret; - } - } - - return 0; -} - -/** - * elf_free_info - free memory allocated by elf_read_from_buffer - */ -void elf_free_info(struct elf_info *elf_info) -{ - kfree(elf_info->proghdrs); - kfree(elf_info->sechdrs); - memset(elf_info, 0, sizeof(*elf_info)); -} -/** - * build_elf_exec_info - read ELF executable and check that we can use it - */ -static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr, - struct elf_info *elf_info) -{ - int i; - int ret; - - ret = elf_read_from_buffer(buf, len, ehdr, elf_info); - if (ret) - return ret; - - /* Big endian vmlinux has type ET_DYN. */ - if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) { - pr_err("Not an ELF executable.\n"); - goto error; - } else if (!elf_info->proghdrs) { - pr_err("No ELF program header.\n"); - goto error; - } - - for (i = 0; i < ehdr->e_phnum; i++) { - /* - * Kexec does not support loading interpreters. - * In addition this check keeps us from attempting - * to kexec ordinay executables. - */ - if (elf_info->proghdrs[i].p_type == PT_INTERP) { - pr_err("Requires an ELF interpreter.\n"); - goto error; - } - } - - return 0; -error: - elf_free_info(elf_info); - return -ENOEXEC; -} - -static int elf64_probe(const char *buf, unsigned long len) -{ - struct elfhdr ehdr; - struct elf_info elf_info; - int ret; - - ret = build_elf_exec_info(buf, len, &ehdr, &elf_info); - if (ret) - return ret; - - elf_free_info(&elf_info); - - return elf_check_arch(&ehdr) ? 0 : -ENOEXEC; -} - -/** - * elf_exec_load - load ELF executable image - * @lowest_load_addr: On return, will be the address where the first PT_LOAD - * section will be loaded in memory. - * - * Return: - * 0 on success, negative value on failure. - */ -static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr, - struct elf_info *elf_info, - unsigned long *lowest_load_addr) -{ - unsigned long base = 0, lowest_addr = UINT_MAX; - int ret; - size_t i; - struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size, - .top_down = false }; - - /* Read in the PT_LOAD segments. */ - for (i = 0; i < ehdr->e_phnum; i++) { - unsigned long load_addr; - size_t size; - const struct elf_phdr *phdr; - - phdr = &elf_info->proghdrs[i]; - if (phdr->p_type != PT_LOAD) - continue; - - size = phdr->p_filesz; - if (size > phdr->p_memsz) - size = phdr->p_memsz; - - kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; - kbuf.bufsz = size; - kbuf.memsz = phdr->p_memsz; - kbuf.buf_align = phdr->p_align; - kbuf.buf_min = phdr->p_paddr + base; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - ret = kexec_add_buffer(&kbuf); - if (ret) - goto out; - load_addr = kbuf.mem; - - if (load_addr < lowest_addr) - lowest_addr = load_addr; - } - - /* Update entry point to reflect new load address. */ - ehdr->e_entry += base; - - *lowest_load_addr = lowest_addr; - ret = 0; - out: - return ret; -} - static void *elf64_load(struct kimage *image, char *kernel_buf, unsigned long kernel_len, char *initrd, unsigned long initrd_len, char *cmdline, @@ -570,18 +35,18 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, void *fdt; const void *slave_code; struct elfhdr ehdr; - struct elf_info elf_info; + struct kexec_elf_info elf_info; struct kexec_buf kbuf = { .image = image, .buf_min = 0, .buf_max = ppc64_rma_size }; struct kexec_buf pbuf = { .image = image, .buf_min = 0, .buf_max = ppc64_rma_size, .top_down = true, .mem = KEXEC_BUF_MEM_UNKNOWN }; - ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info); + ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); if (ret) goto out; - ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr); + ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr); if (ret) goto out; @@ -648,13 +113,13 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_err("Error setting up the purgatory.\n"); out: - elf_free_info(&elf_info); + kexec_free_elf_info(&elf_info); /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */ return ret ? ERR_PTR(ret) : fdt; } const struct kexec_file_ops kexec_elf64_ops = { - .probe = elf64_probe, + .probe = kexec_elf_probe, .load = elf64_load, }; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8fc4de0d22b4..7a84c9f1778e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -101,21 +101,8 @@ static void check_if_tm_restore_required(struct task_struct *tsk) } } -static bool tm_active_with_fp(struct task_struct *tsk) -{ - return MSR_TM_ACTIVE(tsk->thread.regs->msr) && - (tsk->thread.ckpt_regs.msr & MSR_FP); -} - -static bool tm_active_with_altivec(struct task_struct *tsk) -{ - return MSR_TM_ACTIVE(tsk->thread.regs->msr) && - (tsk->thread.ckpt_regs.msr & MSR_VEC); -} #else static inline void check_if_tm_restore_required(struct task_struct *tsk) { } -static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; } -static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ bool strict_msr_control; @@ -252,7 +239,7 @@ EXPORT_SYMBOL(enable_kernel_fp); static int restore_fp(struct task_struct *tsk) { - if (tsk->thread.load_fp || tm_active_with_fp(tsk)) { + if (tsk->thread.load_fp) { load_fp_state(¤t->thread.fp_state); current->thread.load_fp++; return 1; @@ -334,8 +321,7 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread); static int restore_altivec(struct task_struct *tsk) { - if (cpu_has_feature(CPU_FTR_ALTIVEC) && - (tsk->thread.load_vec || tm_active_with_altivec(tsk))) { + if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) { load_vr_state(&tsk->thread.vr_state); tsk->thread.used_vr = 1; tsk->thread.load_vec++; @@ -497,13 +483,14 @@ void giveup_all(struct task_struct *tsk) if (!tsk->thread.regs) return; + check_if_tm_restore_required(tsk); + usermsr = tsk->thread.regs->msr; if ((usermsr & msr_all_available) == 0) return; msr_check_and_set(msr_all_available); - check_if_tm_restore_required(tsk); WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC))); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1f8db666468d..5e6543aba1b3 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -778,12 +778,6 @@ void ppc_printk_progress(char *s, unsigned short hex) pr_info("%s\n", s); } -void arch_setup_pdev_archdata(struct platform_device *pdev) -{ - pdev->archdata.dma_mask = DMA_BIT_MASK(32); - pdev->dev.dma_mask = &pdev->archdata.dma_mask; -} - static __init void print_system_info(void) { pr_info("-----------------------------------------------------\n"); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 9524d92bc45d..d7fcdfa7fee4 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -1083,9 +1083,11 @@ static int kvmppc_book3s_init(void) if (xics_on_xive()) { kvmppc_xive_init_module(); kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); - kvmppc_xive_native_init_module(); - kvm_register_device_ops(&kvm_xive_native_ops, - KVM_DEV_TYPE_XIVE); + if (kvmppc_xive_native_supported()) { + kvmppc_xive_native_init_module(); + kvm_register_device_ops(&kvm_xive_native_ops, + KVM_DEV_TYPE_XIVE); + } } else #endif kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS); diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index e99a14798ab0..c4b606fe73eb 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -660,8 +660,10 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, } tce = be64_to_cpu(tce); - if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) - return H_PARAMETER; + if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) { + ret = H_PARAMETER; + goto unlock_exit; + } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index f50bbeedfc66..b4f20f13b860 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -556,8 +556,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); ua = 0; - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) - return H_PARAMETER; + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { + ret = H_PARAMETER; + goto unlock_exit; + } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index cde3f5a4b3e4..f8975c620f41 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1678,7 +1678,14 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.pspb); break; case KVM_REG_PPC_DPDES: - *val = get_reg_val(id, vcpu->arch.vcore->dpdes); + /* + * On POWER9, where we are emulating msgsndp etc., + * we return 1 bit for each vcpu, which can come from + * either vcore->dpdes or doorbell_request. + * On POWER8, doorbell_request is 0. + */ + *val = get_reg_val(id, vcpu->arch.vcore->dpdes | + vcpu->arch.doorbell_request); break; case KVM_REG_PPC_VTB: *val = get_reg_val(id, vcpu->arch.vcore->vtb); @@ -2860,7 +2867,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) if (!spin_trylock(&pvc->lock)) continue; prepare_threads(pvc); - if (!pvc->n_runnable) { + if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) { list_del_init(&pvc->preempt_list); if (pvc->runner == NULL) { pvc->vcore_state = VCORE_INACTIVE; @@ -2881,15 +2888,20 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) spin_unlock(&lp->lock); } -static bool recheck_signals(struct core_info *cip) +static bool recheck_signals_and_mmu(struct core_info *cip) { int sub, i; struct kvm_vcpu *vcpu; + struct kvmppc_vcore *vc; - for (sub = 0; sub < cip->n_subcores; ++sub) - for_each_runnable_thread(i, vcpu, cip->vc[sub]) + for (sub = 0; sub < cip->n_subcores; ++sub) { + vc = cip->vc[sub]; + if (!vc->kvm->arch.mmu_ready) + return true; + for_each_runnable_thread(i, vcpu, vc) if (signal_pending(vcpu->arch.run_task)) return true; + } return false; } @@ -3119,7 +3131,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) local_irq_disable(); hard_irq_disable(); if (lazy_irq_pending() || need_resched() || - recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) { + recheck_signals_and_mmu(&core_info)) { local_irq_enable(); vc->vcore_state = VCORE_INACTIVE; /* Unlock all except the primary vcore */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 63e0ce91e29d..7186c65c61c9 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -99,7 +99,7 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, } else { rev->forw = rev->back = pte_index; *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | - pte_index | KVMPPC_RMAP_PRESENT; + pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT; } unlock_rmap(rmap); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 337e64468d78..07181d0dfcb7 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -942,6 +942,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) ld r11, VCPU_XIVE_SAVED_STATE(r4) li r9, TM_QW1_OS lwz r8, VCPU_XIVE_CAM_WORD(r4) + cmpwi r8, 0 + beq no_xive li r7, TM_QW1_OS + TM_WORD2 mfmsr r0 andi. r0, r0, MSR_DR /* in real mode? */ @@ -2831,29 +2833,39 @@ kvm_cede_prodded: kvm_cede_exit: ld r9, HSTATE_KVM_VCPU(r13) #ifdef CONFIG_KVM_XICS - /* Abort if we still have a pending escalation */ + /* are we using XIVE with single escalation? */ + ld r10, VCPU_XIVE_ESC_VADDR(r9) + cmpdi r10, 0 + beq 3f + li r6, XIVE_ESB_SET_PQ_00 + /* + * If we still have a pending escalation, abort the cede, + * and we must set PQ to 10 rather than 00 so that we don't + * potentially end up with two entries for the escalation + * interrupt in the XIVE interrupt queue. In that case + * we also don't want to set xive_esc_on to 1 here in + * case we race with xive_esc_irq(). + */ lbz r5, VCPU_XIVE_ESC_ON(r9) cmpwi r5, 0 - beq 1f + beq 4f li r0, 0 stb r0, VCPU_CEDED(r9) -1: /* Enable XIVE escalation */ - li r5, XIVE_ESB_SET_PQ_00 + li r6, XIVE_ESB_SET_PQ_10 + b 5f +4: li r0, 1 + stb r0, VCPU_XIVE_ESC_ON(r9) + /* make sure store to xive_esc_on is seen before xive_esc_irq runs */ + sync +5: /* Enable XIVE escalation */ mfmsr r0 andi. r0, r0, MSR_DR /* in real mode? */ beq 1f - ld r10, VCPU_XIVE_ESC_VADDR(r9) - cmpdi r10, 0 - beq 3f - ldx r0, r10, r5 + ldx r0, r10, r6 b 2f 1: ld r10, VCPU_XIVE_ESC_RADDR(r9) - cmpdi r10, 0 - beq 3f - ldcix r0, r10, r5 + ldcix r0, r10, r6 2: sync - li r0, 1 - stb r0, VCPU_XIVE_ESC_ON(r9) #endif /* CONFIG_KVM_XICS */ 3: b guest_exit_cont diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index e3ba67095895..591bfb4bfd0f 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -67,8 +67,14 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt; u64 pq; - if (!tima) + /* + * Nothing to do if the platform doesn't have a XIVE + * or this vCPU doesn't have its own XIVE context + * (e.g. because it's not using an in-kernel interrupt controller). + */ + if (!tima || !vcpu->arch.xive_cam_word) return; + eieio(); __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS); __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2); @@ -160,6 +166,9 @@ static irqreturn_t xive_esc_irq(int irq, void *data) */ vcpu->arch.xive_esc_on = false; + /* This orders xive_esc_on = false vs. subsequent stale_p = true */ + smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */ + return IRQ_HANDLED; } @@ -1113,6 +1122,31 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) vcpu->arch.xive_esc_raddr = 0; } +/* + * In single escalation mode, the escalation interrupt is marked so + * that EOI doesn't re-enable it, but just sets the stale_p flag to + * indicate that the P bit has already been dealt with. However, the + * assembly code that enters the guest sets PQ to 00 without clearing + * stale_p (because it has no easy way to address it). Hence we have + * to adjust stale_p before shutting down the interrupt. + */ +void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, + struct kvmppc_xive_vcpu *xc, int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + + /* + * This slightly odd sequence gives the right result + * (i.e. stale_p set if xive_esc_on is false) even if + * we race with xive_esc_irq() and xive_irq_eoi(). + */ + xd->stale_p = false; + smp_mb(); /* paired with smb_wmb in xive_esc_irq */ + if (!vcpu->arch.xive_esc_on) + xd->stale_p = true; +} + void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; @@ -1134,20 +1168,28 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) /* Mask the VP IPI */ xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01); - /* Disable the VP */ - xive_native_disable_vp(xc->vp_id); - - /* Free the queues & associated interrupts */ + /* Free escalations */ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { - struct xive_q *q = &xc->queues[i]; - - /* Free the escalation irq */ if (xc->esc_virq[i]) { + if (xc->xive->single_escalation) + xive_cleanup_single_escalation(vcpu, xc, + xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); irq_dispose_mapping(xc->esc_virq[i]); kfree(xc->esc_virq_names[i]); } - /* Free the queue */ + } + + /* Disable the VP */ + xive_native_disable_vp(xc->vp_id); + + /* Clear the cam word so guest entry won't try to push context */ + vcpu->arch.xive_cam_word = 0; + + /* Free the queues */ + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + struct xive_q *q = &xc->queues[i]; + xive_native_disable_queue(xc->vp_id, q, i); if (q->qpage) { free_pages((unsigned long)q->qpage, diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 50494d0ee375..955b820ffd6d 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -282,6 +282,8 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio); int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, bool single_escalation); struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); +void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, + struct kvmppc_xive_vcpu *xc, int irq); #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index a998823f68a3..248c1ea9e788 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -67,20 +67,28 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) xc->valid = false; kvmppc_xive_disable_vcpu_interrupts(vcpu); - /* Disable the VP */ - xive_native_disable_vp(xc->vp_id); - - /* Free the queues & associated interrupts */ + /* Free escalations */ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { /* Free the escalation irq */ if (xc->esc_virq[i]) { + if (xc->xive->single_escalation) + xive_cleanup_single_escalation(vcpu, xc, + xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); irq_dispose_mapping(xc->esc_virq[i]); kfree(xc->esc_virq_names[i]); xc->esc_virq[i] = 0; } + } + + /* Disable the VP */ + xive_native_disable_vp(xc->vp_id); + + /* Clear the cam word so guest entry won't try to push context */ + vcpu->arch.xive_cam_word = 0; - /* Free the queue */ + /* Free the queues */ + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { kvmppc_xive_native_cleanup_queue(vcpu, i); } @@ -1171,6 +1179,11 @@ int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) return 0; } +bool kvmppc_xive_native_supported(void) +{ + return xive_native_has_queue_state_support(); +} + static int xive_native_debug_show(struct seq_file *m, void *private) { struct kvmppc_xive *xive = m->private; diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index b5a848a55504..00649ca5fa9a 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -440,6 +440,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, struct kvm_vcpu *vcpu; int err; + BUILD_BUG_ON_MSG(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0, + "struct kvm_vcpu must be at offset 0 for arch usercopy region"); + vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu_e500) { err = -ENOMEM; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index bb4d09c1ad56..6fca38ca791f 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -271,6 +271,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) */ if (inst == KVMPPC_INST_SW_BREAKPOINT) { run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.status = 0; run->debug.arch.address = kvmppc_get_pc(vcpu); emulated = EMULATE_EXIT_USER; advance = 0; diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 9208c82ed08d..2e496eb86e94 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -89,12 +89,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) rs = get_rs(inst); rt = get_rt(inst); - /* - * if mmio_vsx_tx_sx_enabled == 0, copy data between - * VSR[0..31] and memory - * if mmio_vsx_tx_sx_enabled == 1, copy data between - * VSR[32..63] and memory - */ vcpu->arch.mmio_vsx_copy_nums = 0; vcpu->arch.mmio_vsx_offset = 0; vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3e566c2e6066..3a77bb643452 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -561,7 +561,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * a POWER9 processor) and the PowerNV platform, as * nested is not yet supported. */ - r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE); + r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE) && + kvmppc_xive_native_supported(); break; #endif diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index d4acf6fa0596..bf60983a58c7 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -630,7 +630,6 @@ static void early_init_this_mmu(void) #ifdef CONFIG_PPC_FSL_BOOK3E if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { unsigned int num_cams; - int __maybe_unused cpu = smp_processor_id(); bool map = true; /* use a quarter of the TLBCAM for bolted linear map */ diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 1cdb39575eae..be86fce1a84e 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -135,7 +135,7 @@ static u32 xive_read_eq(struct xive_q *q, bool just_peek) static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek) { u32 irq = 0; - u8 prio; + u8 prio = 0; /* Find highest pending priority */ while (xc->pending_prio != 0) { @@ -148,8 +148,19 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek) irq = xive_read_eq(&xc->queue[prio], just_peek); /* Found something ? That's it */ - if (irq) - break; + if (irq) { + if (just_peek || irq_to_desc(irq)) + break; + /* + * We should never get here; if we do then we must + * have failed to synchronize the interrupt properly + * when shutting it down. + */ + pr_crit("xive: got interrupt %d without descriptor, dropping\n", + irq); + WARN_ON(1); + continue; + } /* Clear pending bits */ xc->pending_prio &= ~(1 << prio); @@ -307,6 +318,7 @@ static void xive_do_queue_eoi(struct xive_cpu *xc) */ static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) { + xd->stale_p = false; /* If the XIVE supports the new "store EOI facility, use it */ if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); @@ -350,7 +362,7 @@ static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) } } -/* irq_chip eoi callback */ +/* irq_chip eoi callback, called with irq descriptor lock held */ static void xive_irq_eoi(struct irq_data *d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); @@ -366,6 +378,8 @@ static void xive_irq_eoi(struct irq_data *d) if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && !(xd->flags & XIVE_IRQ_NO_EOI)) xive_do_source_eoi(irqd_to_hwirq(d), xd); + else + xd->stale_p = true; /* * Clear saved_p to indicate that it's no longer occupying @@ -397,11 +411,16 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd, */ if (mask) { val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01); - xd->saved_p = !!(val & XIVE_ESB_VAL_P); - } else if (xd->saved_p) + if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P)) + xd->saved_p = true; + xd->stale_p = false; + } else if (xd->saved_p) { xive_esb_read(xd, XIVE_ESB_SET_PQ_10); - else + xd->saved_p = false; + } else { xive_esb_read(xd, XIVE_ESB_SET_PQ_00); + xd->stale_p = false; + } } /* @@ -541,6 +560,8 @@ static unsigned int xive_irq_startup(struct irq_data *d) unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); int target, rc; + xd->saved_p = false; + xd->stale_p = false; pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n", d->irq, hw_irq, d); @@ -587,6 +608,7 @@ static unsigned int xive_irq_startup(struct irq_data *d) return 0; } +/* called with irq descriptor lock held */ static void xive_irq_shutdown(struct irq_data *d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); @@ -602,16 +624,6 @@ static void xive_irq_shutdown(struct irq_data *d) xive_do_source_set_mask(xd, true); /* - * The above may have set saved_p. We clear it otherwise it - * will prevent re-enabling later on. It is ok to forget the - * fact that the interrupt might be in a queue because we are - * accounting that already in xive_dec_target_count() and will - * be re-routing it to a new queue with proper accounting when - * it's started up again - */ - xd->saved_p = false; - - /* * Mask the interrupt in HW in the IVT/EAS and set the number * to be the "bad" IRQ number */ @@ -797,6 +809,10 @@ static int xive_irq_retrigger(struct irq_data *d) return 1; } +/* + * Caller holds the irq descriptor lock, so this won't be called + * concurrently with xive_get_irqchip_state on the same interrupt. + */ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); @@ -820,6 +836,10 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) /* Set it to PQ=10 state to prevent further sends */ pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10); + if (!xd->stale_p) { + xd->saved_p = !!(pq & XIVE_ESB_VAL_P); + xd->stale_p = !xd->saved_p; + } /* No target ? nothing to do */ if (xd->target == XIVE_INVALID_TARGET) { @@ -827,7 +847,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) * An untargetted interrupt should have been * also masked at the source */ - WARN_ON(pq & 2); + WARN_ON(xd->saved_p); return 0; } @@ -847,9 +867,8 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) * This saved_p is cleared by the host EOI, when we know * for sure the queue slot is no longer in use. */ - if (pq & 2) { - pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11); - xd->saved_p = true; + if (xd->saved_p) { + xive_esb_read(xd, XIVE_ESB_SET_PQ_11); /* * Sync the XIVE source HW to ensure the interrupt @@ -862,8 +881,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) */ if (xive_ops->sync_source) xive_ops->sync_source(hw_irq); - } else - xd->saved_p = false; + } } else { irqd_clr_forwarded_to_vcpu(d); @@ -914,6 +932,23 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) return 0; } +/* Called with irq descriptor lock held. */ +static int xive_get_irqchip_state(struct irq_data *data, + enum irqchip_irq_state which, bool *state) +{ + struct xive_irq_data *xd = irq_data_get_irq_handler_data(data); + + switch (which) { + case IRQCHIP_STATE_ACTIVE: + *state = !xd->stale_p && + (xd->saved_p || + !!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P)); + return 0; + default: + return -EINVAL; + } +} + static struct irq_chip xive_irq_chip = { .name = "XIVE-IRQ", .irq_startup = xive_irq_startup, @@ -925,6 +960,7 @@ static struct irq_chip xive_irq_chip = { .irq_set_type = xive_irq_set_type, .irq_retrigger = xive_irq_retrigger, .irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity, + .irq_get_irqchip_state = xive_get_irqchip_state, }; bool is_xive_irq(struct irq_chip *chip) @@ -1338,6 +1374,11 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) xd = irq_desc_get_handler_data(desc); /* + * Clear saved_p to indicate that it's no longer pending + */ + xd->saved_p = false; + + /* * For LSIs, we EOI, this will cause a resend if it's * still asserted. Otherwise do an MSI retrigger. */ diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 2f26b74f6cfa..37987c815913 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -800,6 +800,13 @@ int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex) } EXPORT_SYMBOL_GPL(xive_native_set_queue_state); +bool xive_native_has_queue_state_support(void) +{ + return opal_check_token(OPAL_XIVE_GET_QUEUE_STATE) && + opal_check_token(OPAL_XIVE_SET_QUEUE_STATE); +} +EXPORT_SYMBOL_GPL(xive_native_has_queue_state_support); + int xive_native_get_vp_state(u32 vp_id, u64 *out_state) { __be64 state; |