diff options
Diffstat (limited to 'arch/powerpc')
53 files changed, 920 insertions, 708 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 65fba4c34cd7..c7f120aaa98f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -160,6 +160,7 @@ config PPC select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select GENERIC_CPU_AUTOPROBE select HAVE_VIRT_CPU_ACCOUNTING + select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select HAVE_ARCH_HARDENED_USERCOPY select HAVE_KERNEL_GZIP diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index eae2dc8bc218..9d47f2efa830 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -100,7 +100,8 @@ src-wlib-y := string.S crt0.S crtsavres.S stdio.c decompress.c main.c \ ns16550.c serial.c simple_alloc.c div64.S util.S \ elf_util.c $(zlib-y) devtree.c stdlib.c \ oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \ - uartlite.c mpc52xx-psc.c opal.c opal-calls.S + uartlite.c mpc52xx-psc.c opal.c +src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c diff --git a/arch/powerpc/boot/opal.c b/arch/powerpc/boot/opal.c index d7b4fd47eb44..0272570d02de 100644 --- a/arch/powerpc/boot/opal.c +++ b/arch/powerpc/boot/opal.c @@ -13,7 +13,7 @@ #include <libfdt.h> #include "../include/asm/opal-api.h" -#ifdef __powerpc64__ +#ifdef CONFIG_PPC64_BOOT_WRAPPER /* Global OPAL struct used by opal-call.S */ struct opal { diff --git a/arch/powerpc/configs/dpaa.config b/arch/powerpc/configs/dpaa.config index efa99c048543..2fe76f5e938a 100644 --- a/arch/powerpc/configs/dpaa.config +++ b/arch/powerpc/configs/dpaa.config @@ -1 +1,4 @@ CONFIG_FSL_DPAA=y +CONFIG_FSL_PAMU=y +CONFIG_FSL_FMAN=y +CONFIG_FSL_DPAA_ETH=y diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index e407af2b7333..2e6a823fa502 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -70,7 +70,9 @@ #define HPTE_V_SSIZE_SHIFT 62 #define HPTE_V_AVPN_SHIFT 7 +#define HPTE_V_COMMON_BITS ASM_CONST(0x000fffffffffffff) #define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) +#define HPTE_V_AVPN_3_0 ASM_CONST(0x000fffffffffff80) #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) #define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL)) #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) @@ -80,14 +82,16 @@ #define HPTE_V_VALID ASM_CONST(0x0000000000000001) /* - * ISA 3.0 have a different HPTE format. + * ISA 3.0 has a different HPTE format. */ #define HPTE_R_3_0_SSIZE_SHIFT 58 +#define HPTE_R_3_0_SSIZE_MASK (3ull << HPTE_R_3_0_SSIZE_SHIFT) #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) +#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000) #define HPTE_R_PP ASM_CONST(0x0000000000000003) #define HPTE_R_PPP ASM_CONST(0x8000000000000003) #define HPTE_R_N ASM_CONST(0x0000000000000004) @@ -316,12 +320,43 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, */ v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); v <<= HPTE_V_AVPN_SHIFT; - if (!cpu_has_feature(CPU_FTR_ARCH_300)) - v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; + v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; return v; } /* + * ISA v3.0 defines a new HPTE format, which differs from the old + * format in having smaller AVPN and ARPN fields, and the B field + * in the second dword instead of the first. + */ +static inline unsigned long hpte_old_to_new_v(unsigned long v) +{ + /* trim AVPN, drop B */ + return v & HPTE_V_COMMON_BITS; +} + +static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long r) +{ + /* move B field from 1st to 2nd dword, trim ARPN */ + return (r & ~HPTE_R_3_0_SSIZE_MASK) | + (((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT); +} + +static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long r) +{ + /* insert B field */ + return (v & HPTE_V_COMMON_BITS) | + ((r & HPTE_R_3_0_SSIZE_MASK) << + (HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT)); +} + +static inline unsigned long hpte_new_to_old_r(unsigned long r) +{ + /* clear out B field */ + return r & ~HPTE_R_3_0_SSIZE_MASK; +} + +/* * This function sets the AVPN and L fields of the HPTE appropriately * using the base page size and actual page size. */ @@ -341,12 +376,8 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize, * aligned for the requested page size */ static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize, - int actual_psize, int ssize) + int actual_psize) { - - if (cpu_has_feature(CPU_FTR_ARCH_300)) - pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT; - /* A 4K page needs no special encoding */ if (actual_psize == MMU_PAGE_4K) return pa & HPTE_R_RPN; diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9fd77f8794a0..0ebfbc8f0449 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1009,7 +1009,8 @@ static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, #define pmd_move_must_withdraw pmd_move_must_withdraw struct spinlock; static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, - struct spinlock *old_pmd_ptl) + struct spinlock *old_pmd_ptl, + struct vm_area_struct *vma) { if (radix_enabled()) return false; @@ -1020,6 +1021,16 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, */ return true; } + + +#define arch_needs_pgtable_deposit arch_needs_pgtable_deposit +static inline bool arch_needs_pgtable_deposit(void) +{ + if (radix_enabled()) + return false; + return true; +} + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 4f60db074725..aa2e6a34b872 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -46,26 +46,12 @@ extern cputime_t cputime_one_jiffy; * Convert cputime <-> jiffies */ extern u64 __cputime_jiffies_factor; -DECLARE_PER_CPU(unsigned long, cputime_last_delta); -DECLARE_PER_CPU(unsigned long, cputime_scaled_last_delta); static inline unsigned long cputime_to_jiffies(const cputime_t ct) { return mulhdu((__force u64) ct, __cputime_jiffies_factor); } -/* Estimate the scaled cputime by scaling the real cputime based on - * the last scaled to real ratio */ -static inline cputime_t cputime_to_scaled(const cputime_t ct) -{ - if (cpu_has_feature(CPU_FTR_SPURR) && - __this_cpu_read(cputime_last_delta)) - return (__force u64) ct * - __this_cpu_read(cputime_scaled_last_delta) / - __this_cpu_read(cputime_last_delta); - return ct; -} - static inline cputime_t jiffies_to_cputime(const unsigned long jif) { u64 ct; diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 05cabed3d1bd..09a802bb702f 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -99,6 +99,7 @@ #define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40 #define BOOK3S_INTERRUPT_HMI 0xe60 #define BOOK3S_INTERRUPT_H_DOORBELL 0xe80 +#define BOOK3S_INTERRUPT_H_VIRT 0xea0 #define BOOK3S_INTERRUPT_PERFMON 0xf00 #define BOOK3S_INTERRUPT_ALTIVEC 0xf20 #define BOOK3S_INTERRUPT_VSX 0xf40 diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 28350a294b1e..e59b172666cd 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -48,7 +48,7 @@ #ifdef CONFIG_KVM_MMIO #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif -#define KVM_HALT_POLL_NS_DEFAULT 500000 +#define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */ /* These values are internal and can be increased later */ #define KVM_NR_IRQCHIPS 1 @@ -244,8 +244,10 @@ struct kvm_arch_memory_slot { struct kvm_arch { unsigned int lpid; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + unsigned int tlb_sets; unsigned long hpt_virt; struct revmap_entry *revmap; + atomic64_t mmio_update; unsigned int host_lpid; unsigned long host_lpcr; unsigned long sdr1; @@ -408,6 +410,24 @@ struct kvmppc_passthru_irqmap { #define KVMPPC_IRQ_MPIC 1 #define KVMPPC_IRQ_XICS 2 +#define MMIO_HPTE_CACHE_SIZE 4 + +struct mmio_hpte_cache_entry { + unsigned long hpte_v; + unsigned long hpte_r; + unsigned long rpte; + unsigned long pte_index; + unsigned long eaddr; + unsigned long slb_v; + long mmio_update; + unsigned int slb_base_pshift; +}; + +struct mmio_hpte_cache { + struct mmio_hpte_cache_entry entry[MMIO_HPTE_CACHE_SIZE]; + unsigned int index; +}; + struct openpic; struct kvm_vcpu_arch { @@ -498,6 +518,8 @@ struct kvm_vcpu_arch { ulong tcscr; ulong acop; ulong wort; + ulong tid; + ulong psscr; ulong shadow_srr1; #endif u32 vrsave; /* also USPRG0 */ @@ -546,6 +568,7 @@ struct kvm_vcpu_arch { u64 tfiar; u32 cr_tm; + u64 xer_tm; u64 lr_tm; u64 ctr_tm; u64 amr_tm; @@ -655,9 +678,11 @@ struct kvm_vcpu_arch { #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE struct kvm_vcpu_arch_shared shregs; + struct mmio_hpte_cache mmio_cache; unsigned long pgfault_addr; long pgfault_index; unsigned long pgfault_hpte[2]; + struct mmio_hpte_cache_entry *pgfault_cache; struct task_struct *run_task; struct kvm_run *kvm_run; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index f6e49640dbe1..2da67bf1f2ec 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -483,9 +483,10 @@ extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq, unsigned long host_irq); extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq, unsigned long host_irq); -extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr, - struct kvmppc_irq_map *irq_map, - struct kvmppc_passthru_irqmap *pimap); +extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr, + struct kvmppc_irq_map *irq_map, + struct kvmppc_passthru_irqmap *pimap, + bool *again); extern int h_ipi_redirect; #else static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( @@ -510,6 +511,48 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) #endif /* + * Prototypes for functions called only from assembler code. + * Having prototypes reduces sparse errors. + */ +long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, + unsigned long ioba, unsigned long tce); +long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_list, unsigned long npages); +long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_value, unsigned long npages); +long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, + unsigned int yield_count); +long kvmppc_h_random(struct kvm_vcpu *vcpu); +void kvmhv_commence_exit(int trap); +long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); +void kvmppc_subcore_enter_guest(void); +void kvmppc_subcore_exit_guest(void); +long kvmppc_realmode_hmi_handler(void); +long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel); +long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn); +long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu); +long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn, + unsigned long va); +long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index); +long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index); +long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index); +long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, + unsigned long slb_v, unsigned int status, bool data); +unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu); +int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, + unsigned long mfrr); +int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); +int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr); + +/* * Host-side operations we want to set up while running in real * mode in the guest operating on the xics. * Currently only VCPU wakeup is supported. diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e311c25751a4..8d1499334257 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -214,6 +214,11 @@ extern u64 ppc64_rma_size; /* Cleanup function used by kexec */ extern void mmu_cleanup_all(void); extern void radix__mmu_cleanup_all(void); + +/* Functions for creating and updating partition table on POWER9 */ +extern void mmu_partition_table_init(void); +extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, + unsigned long dw1); #endif /* CONFIG_PPC64 */ struct mm_struct; diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h deleted file mode 100644 index 078155fa1189..000000000000 --- a/arch/powerpc/include/asm/mutex.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Optimised mutex implementation of include/asm-generic/mutex-dec.h algorithm - */ -#ifndef _ASM_POWERPC_MUTEX_H -#define _ASM_POWERPC_MUTEX_H - -static inline int __mutex_cmpxchg_lock(atomic_t *v, int old, int new) -{ - int t; - - __asm__ __volatile__ ( -"1: lwarx %0,0,%1 # mutex trylock\n\ - cmpw 0,%0,%2\n\ - bne- 2f\n" - PPC405_ERR77(0,%1) -" stwcx. %3,0,%1\n\ - bne- 1b" - PPC_ACQUIRE_BARRIER - "\n\ -2:" - : "=&r" (t) - : "r" (&v->counter), "r" (old), "r" (new) - : "cc", "memory"); - - return t; -} - -static inline int __mutex_dec_return_lock(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%1 # mutex lock\n\ - addic %0,%0,-1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1\n\ - bne- 1b" - PPC_ACQUIRE_BARRIER - : "=&r" (t) - : "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -static inline int __mutex_inc_return_unlock(atomic_t *v) -{ - int t; - - __asm__ __volatile__( - PPC_RELEASE_BARRIER -"1: lwarx %0,0,%1 # mutex unlock\n\ - addic %0,%0,1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1 \n\ - bne- 1b" - : "=&r" (t) - : "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -/** - * __mutex_fastpath_lock - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 - * - * Change the count from 1 to a value lower than 1, and call <fail_fn> if - * it wasn't 1 originally. This function MUST leave the value lower than - * 1 even when the "1" assertion wasn't true. - */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(__mutex_dec_return_lock(count) < 0)) - fail_fn(count); -} - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int -__mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(__mutex_dec_return_lock(count) < 0)) - return -1; - return 0; -} - -/** - * __mutex_fastpath_unlock - try to promote the count from 0 to 1 - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 0 - * - * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>. - * In the failure case, this function is allowed to either set the value to - * 1, or to set it to a value lower than 1. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(__mutex_inc_return_unlock(count) <= 0)) - fail_fn(count); -} - -#define __mutex_slowpath_needs_to_unlock() 1 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: fallback function - * - * Change the count from 1 to 0, and return 1 (success), or if the count - * was not 1, then return 0 (failure). - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1)) - return 1; - return 0; -} - -#endif diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index e958b7096f19..5c7db0f1a708 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -220,9 +220,12 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id, int64_t opal_pci_poll2(uint64_t id, uint64_t data); int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll); +int64_t opal_rm_int_get_xirr(__be32 *out_xirr, bool just_poll); int64_t opal_int_set_cppr(uint8_t cppr); int64_t opal_int_eoi(uint32_t xirr); +int64_t opal_rm_int_eoi(uint32_t xirr); int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr); +int64_t opal_rm_int_set_mfrr(uint32_t cpu, uint8_t mfrr); int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, uint32_t pe_num, uint32_t tce_size, uint64_t dma_addr, uint32_t npages); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c07c31b0e89e..dac83fcb9445 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -404,8 +404,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define cpu_relax() barrier() #endif -#define cpu_relax_lowlatency() cpu_relax() - /* Check that a certain kernel stack pointer is valid in task_struct p */ int validate_sp(unsigned long sp, struct task_struct *p, unsigned long nbytes); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 9e1499f98def..04aa1ee8cdb6 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -153,6 +153,8 @@ #define PSSCR_EC 0x00100000 /* Exit Criterion */ #define PSSCR_ESL 0x00200000 /* Enable State Loss */ #define PSSCR_SD 0x00400000 /* Status Disable */ +#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */ +#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */ /* Floating Point Status and Control Register (FPSCR) Fields */ #define FPSCR_FX 0x80000000 /* FPU exception summary */ @@ -236,6 +238,7 @@ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ #define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ +#define SPRN_TIDR 144 /* Thread ID register */ #define SPRN_CTRLF 0x088 #define SPRN_CTRLT 0x098 #define CTRL_CT 0xc0000000 /* current thread */ @@ -294,6 +297,7 @@ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ #define SPRN_LMRR 0x32D /* Load Monitor Region Register */ #define SPRN_LMSER 0x32E /* Load Monitor Section Enable Register */ +#define SPRN_ASDR 0x330 /* Access segment descriptor register */ #define SPRN_IC 0x350 /* Virtual Instruction Count */ #define SPRN_VTB 0x351 /* Virtual Time Base */ #define SPRN_LDBAR 0x352 /* LD Base Address Register */ @@ -305,6 +309,7 @@ /* HFSCR and FSCR bit numbers are the same */ #define FSCR_LM_LG 11 /* Enable Load Monitor Registers */ +#define FSCR_MSGP_LG 10 /* Enable MSGP */ #define FSCR_TAR_LG 8 /* Enable Target Address Register */ #define FSCR_EBB_LG 7 /* Enable Event Based Branching */ #define FSCR_TM_LG 5 /* Enable Transactional Memory */ @@ -320,6 +325,7 @@ #define FSCR_DSCR __MASK(FSCR_DSCR_LG) #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ #define HFSCR_LM __MASK(FSCR_LM_LG) +#define HFSCR_MSGP __MASK(FSCR_MSGP_LG) #define HFSCR_TAR __MASK(FSCR_TAR_LG) #define HFSCR_EBB __MASK(FSCR_EBB_LG) #define HFSCR_TM __MASK(FSCR_TM_LG) @@ -358,6 +364,7 @@ #define LPCR_PECE_HVEE ASM_CONST(0x0000400000000000) /* P9 Wakeup on HV interrupts */ #define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */ #define LPCR_MER_SH 11 +#define LPCR_GTSE ASM_CONST(0x0000000000000400) /* Guest Translation Shootdown Enable */ #define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */ #define LPCR_LPES 0x0000000c #define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */ @@ -378,6 +385,12 @@ #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ #define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */ +/* + * These bits are used in the function kvmppc_set_arch_compat() to specify and + * determine both the compatibility level which we want to emulate and the + * compatibility level which the host is capable of emulating. + */ +#define PCR_ARCH_207 0x8 /* Architecture 2.07 */ #define PCR_ARCH_206 0x4 /* Architecture 2.06 */ #define PCR_ARCH_205 0x2 /* Architecture 2.05 */ #define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ @@ -1219,6 +1232,7 @@ #define PVR_ARCH_206 0x0f000003 #define PVR_ARCH_206p 0x0f100003 #define PVR_ARCH_207 0x0f000004 +#define PVR_ARCH_300 0x0f000005 /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index fa37fe93bc02..8c1b913de6d7 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -52,6 +52,14 @@ #define SYNC_IO #endif +#ifdef CONFIG_PPC_PSERIES +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); +} +#endif + static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) { return lock.slock == 0; diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 99e1397b71da..609557569f65 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -28,6 +28,7 @@ #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) #define __tlb_remove_tlb_entry __tlb_remove_tlb_entry +#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change extern void tlb_flush(struct mmu_gather *tlb); @@ -46,6 +47,21 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, #endif } +static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, + unsigned int page_size) +{ + if (!tlb->page_size) + tlb->page_size = page_size; + else if (tlb->page_size != page_size) { + tlb_flush_mmu(tlb); + /* + * update the page size after flush for the new + * mmu_gather. + */ + tlb->page_size = page_size; + } +} + #ifdef CONFIG_SMP static inline int mm_is_core_local(struct mm_struct *mm) { diff --git a/arch/powerpc/include/asm/xilinx_intc.h b/arch/powerpc/include/asm/xilinx_intc.h index 343612f8fece..3192d7f0a05b 100644 --- a/arch/powerpc/include/asm/xilinx_intc.h +++ b/arch/powerpc/include/asm/xilinx_intc.h @@ -14,7 +14,7 @@ #ifdef __KERNEL__ extern void __init xilinx_intc_init_tree(void); -extern unsigned int xilinx_intc_get_irq(void); +extern unsigned int xintc_get_irq(void); #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_XILINX_INTC_H */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index c93cf35ce379..3603b6f51b11 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -573,6 +573,10 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) #define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb) +/* POWER9 registers */ +#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) +#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ @@ -596,6 +600,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) #define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) #define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) +#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a) /* PPC64 eXternal Interrupt Controller Specification */ #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index 1672e3398270..44583a52f882 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -97,4 +97,6 @@ #define SO_CNX_ADVICE 53 +#define SCM_TIMESTAMPING_OPT_STATS 54 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index caec7bf3b99a..195a9fc8f81c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -487,6 +487,7 @@ int main(void) /* book3s */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + DEFINE(KVM_TLB_SETS, offsetof(struct kvm, arch.tlb_sets)); DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); @@ -548,6 +549,8 @@ int main(void) DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); + DEFINE(VCPU_TID, offsetof(struct kvm_vcpu, arch.tid)); + DEFINE(VCPU_PSSCR, offsetof(struct kvm_vcpu, arch.psscr)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); @@ -569,6 +572,7 @@ int main(void) DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); + DEFINE(VCPU_XER_TM, offsetof(struct kvm_vcpu, arch.xer_tm)); DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 37c027ca83b2..f3e1f5d29dce 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -174,7 +174,7 @@ __init_FSCR: __init_HFSCR: mfspr r3,SPRN_HFSCR ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\ - HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB + HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP mtspr SPRN_HFSCR,r3 blr diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index a62be72da274..5c31369435f2 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -671,8 +671,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); - if (rc) + if (rc) { + pci_unlock_rescan_remove(); return rc; + } /* Give the system 5 seconds to finish running the user-space * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c4f1d1f7bae0..c1fb255a60d6 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -703,7 +703,7 @@ static struct device_attribute pa6t_attrs[] = { #endif /* HAS_PPC_PMC_PA6T */ #endif /* HAS_PPC_PMC_CLASSIC */ -static void register_cpu_online(unsigned int cpu) +static int register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); struct device *s = &c->dev; @@ -782,11 +782,12 @@ static void register_cpu_online(unsigned int cpu) } #endif cacheinfo_cpu_online(cpu); + return 0; } -#ifdef CONFIG_HOTPLUG_CPU -static void unregister_cpu_online(unsigned int cpu) +static int unregister_cpu_online(unsigned int cpu) { +#ifdef CONFIG_HOTPLUG_CPU struct cpu *c = &per_cpu(cpu_devices, cpu); struct device *s = &c->dev; struct device_attribute *attrs, *pmc_attrs; @@ -863,6 +864,8 @@ static void unregister_cpu_online(unsigned int cpu) } #endif cacheinfo_cpu_offline(cpu); +#endif /* CONFIG_HOTPLUG_CPU */ + return 0; } #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE @@ -883,32 +886,6 @@ ssize_t arch_cpu_release(const char *buf, size_t count) } #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ -#endif /* CONFIG_HOTPLUG_CPU */ - -static int sysfs_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned int)(long)hcpu; - - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - register_cpu_online(cpu); - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - case CPU_DEAD_FROZEN: - unregister_cpu_online(cpu); - break; -#endif - } - return NOTIFY_OK; -} - -static struct notifier_block sysfs_cpu_nb = { - .notifier_call = sysfs_cpu_notify, -}; - static DEFINE_MUTEX(cpu_mutex); int cpu_add_dev_attr(struct device_attribute *attr) @@ -1023,12 +1000,10 @@ static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL); static int __init topology_init(void) { - int cpu; + int cpu, r; register_nodes(); - cpu_notifier_register_begin(); - for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); @@ -1047,15 +1022,10 @@ static int __init topology_init(void) device_create_file(&c->dev, &dev_attr_physical_id); } - - if (cpu_online(cpu)) - register_cpu_online(cpu); } - - __register_cpu_notifier(&sysfs_cpu_nb); - - cpu_notifier_register_done(); - + r = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/topology:online", + register_cpu_online, unregister_cpu_online); + WARN_ON(r < 0); #ifdef CONFIG_PPC64 sysfs_create_dscr_default(); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bc3f7d0d7b79..be9751f1cb2a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -164,8 +164,6 @@ u64 __cputime_sec_factor; EXPORT_SYMBOL(__cputime_sec_factor); u64 __cputime_clockt_factor; EXPORT_SYMBOL(__cputime_clockt_factor); -DEFINE_PER_CPU(unsigned long, cputime_last_delta); -DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); cputime_t cputime_one_jiffy; @@ -360,7 +358,8 @@ void vtime_account_system(struct task_struct *tsk) unsigned long delta, sys_scaled, stolen; delta = vtime_delta(tsk, &sys_scaled, &stolen); - account_system_time(tsk, 0, delta, sys_scaled); + account_system_time(tsk, 0, delta); + tsk->stimescaled += sys_scaled; if (stolen) account_steal_time(stolen); } @@ -393,7 +392,8 @@ void vtime_account_user(struct task_struct *tsk) acct->user_time = 0; acct->user_time_scaled = 0; acct->utime_sspurr = 0; - account_user_time(tsk, utime, utimescaled); + account_user_time(tsk, utime); + tsk->utimescaled += utimescaled; } #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8295f51c1a5f..7394b770ae1f 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -94,8 +94,17 @@ SECTIONS * detected, and will result in a crash at boot due to offsets being * wrong. */ +#ifdef CONFIG_PPC64 + /* + * BLOCK(0) overrides the default output section alignment because + * this needs to start right after .head.text in order for fixed + * section placement to work. + */ + .text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) { +#else .text : AT(ADDR(.text) - LOAD_OFFSET) { ALIGN_FUNCTION(); +#endif /* careful! __ftr_alt_* sections need to be close to .text */ *(.text .fixup __ftr_alt_* .ref.text) SCHED_TEXT diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 05f09ae82587..b795dd1ac2ef 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -88,6 +88,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) /* 128 (2**7) bytes in each HPTEG */ kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; + atomic64_set(&kvm->arch.mmio_update, 0); + /* Allocate reverse map array */ rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); if (!rev) { @@ -255,7 +257,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, msr); } -long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, +static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, unsigned long *pte_idx_ret) { @@ -312,7 +314,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_slb *slbe; unsigned long slb_v; unsigned long pp, key; - unsigned long v, gr; + unsigned long v, orig_v, gr; __be64 *hptep; int index; int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); @@ -337,10 +339,12 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, return -ENOENT; } hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); - v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; + v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1])); gr = kvm->arch.revmap[index].guest_rpte; - unlock_hpte(hptep, v); + unlock_hpte(hptep, orig_v); preempt_enable(); gpte->eaddr = eaddr; @@ -438,6 +442,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, { struct kvm *kvm = vcpu->kvm; unsigned long hpte[3], r; + unsigned long hnow_v, hnow_r; __be64 *hptep; unsigned long mmu_seq, psize, pte_size; unsigned long gpa_base, gfn_base; @@ -451,6 +456,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int writing, write_ok; struct vm_area_struct *vma; unsigned long rcbits; + long mmio_update; /* * Real-mode code has already searched the HPT and found the @@ -460,6 +466,19 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ if (ea != vcpu->arch.pgfault_addr) return RESUME_GUEST; + + if (vcpu->arch.pgfault_cache) { + mmio_update = atomic64_read(&kvm->arch.mmio_update); + if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) { + r = vcpu->arch.pgfault_cache->rpte; + psize = hpte_page_size(vcpu->arch.pgfault_hpte[0], r); + gpa_base = r & HPTE_R_RPN & ~(psize - 1); + gfn_base = gpa_base >> PAGE_SHIFT; + gpa = gpa_base | (ea & (psize - 1)); + return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, + dsisr & DSISR_ISSTORE); + } + } index = vcpu->arch.pgfault_index; hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); rev = &kvm->arch.revmap[index]; @@ -472,6 +491,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unlock_hpte(hptep, hpte[0]); preempt_enable(); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte[0] = hpte_new_to_old_v(hpte[0], hpte[1]); + hpte[1] = hpte_new_to_old_r(hpte[1]); + } if (hpte[0] != vcpu->arch.pgfault_hpte[0] || hpte[1] != vcpu->arch.pgfault_hpte[1]) return RESUME_GUEST; @@ -575,16 +598,22 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ if (psize < PAGE_SIZE) psize = PAGE_SIZE; - r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1)); + r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | + ((pfn << PAGE_SHIFT) & ~(psize - 1)); if (hpte_is_writable(r) && !write_ok) r = hpte_make_readonly(r); ret = RESUME_GUEST; preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) cpu_relax(); - if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] || - be64_to_cpu(hptep[1]) != hpte[1] || - rev->guest_rpte != hpte[2]) + hnow_v = be64_to_cpu(hptep[0]); + hnow_r = be64_to_cpu(hptep[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hnow_v = hpte_new_to_old_v(hnow_v, hnow_r); + hnow_r = hpte_new_to_old_r(hnow_r); + } + if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] || + rev->guest_rpte != hpte[2]) /* HPTE has been changed under us; let the guest retry */ goto out_unlock; hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; @@ -615,6 +644,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); } + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + r = hpte_old_to_new_r(hpte[0], r); + hpte[0] = hpte_old_to_new_v(hpte[0]); + } hptep[1] = cpu_to_be64(r); eieio(); __unlock_hpte(hptep, hpte[0]); @@ -758,6 +791,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, hpte_rpn(ptel, psize) == gfn) { hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); + hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO); /* Harvest R and C */ rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; @@ -1165,7 +1199,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp, unsigned long *hpte, struct revmap_entry *revp, int want_valid, int first_pass) { - unsigned long v, r; + unsigned long v, r, hr; unsigned long rcbits_unset; int ok = 1; int valid, dirty; @@ -1192,6 +1226,11 @@ static long record_hpte(unsigned long flags, __be64 *hptp, while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) cpu_relax(); v = be64_to_cpu(hptp[0]); + hr = be64_to_cpu(hptp[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + v = hpte_new_to_old_v(v, hr); + hr = hpte_new_to_old_r(hr); + } /* re-evaluate valid and dirty from synchronized HPTE value */ valid = !!(v & HPTE_V_VALID); @@ -1199,8 +1238,8 @@ static long record_hpte(unsigned long flags, __be64 *hptp, /* Harvest R and C into guest view if necessary */ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); - if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) { - revp->guest_rpte |= (be64_to_cpu(hptp[1]) & + if (valid && (rcbits_unset & hr)) { + revp->guest_rpte |= (hr & (HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED; dirty = 1; } @@ -1608,7 +1647,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf, return ret; } -ssize_t debugfs_htab_write(struct file *file, const char __user *buf, +static ssize_t debugfs_htab_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { return -EACCES; diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index d461c440889a..e4c4ea973e57 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -39,7 +39,6 @@ #include <asm/udbg.h> #include <asm/iommu.h> #include <asm/tce.h> -#include <asm/iommu.h> #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3686471be32b..8dcbe37a4dac 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -54,6 +54,9 @@ #include <asm/dbell.h> #include <asm/hmi.h> #include <asm/pnv-pci.h> +#include <asm/mmu.h> +#include <asm/opal.h> +#include <asm/xics.h> #include <linux/gfp.h> #include <linux/vmalloc.h> #include <linux/highmem.h> @@ -62,6 +65,7 @@ #include <linux/irqbypass.h> #include <linux/module.h> #include <linux/compiler.h> +#include <linux/of.h> #include "book3s.h" @@ -104,23 +108,6 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); #endif -/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */ -static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT; -module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns"); - -/* Factor by which the vcore halt poll interval is grown, default is to double - */ -static unsigned int halt_poll_ns_grow = 2; -module_param(halt_poll_ns_grow, int, S_IRUGO); -MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by"); - -/* Factor by which the vcore halt poll interval is shrunk, default is to reset - */ -static unsigned int halt_poll_ns_shrink; -module_param(halt_poll_ns_shrink, int, S_IRUGO); -MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by"); - static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -146,12 +133,21 @@ static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, static bool kvmppc_ipi_thread(int cpu) { + unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); + + /* On POWER9 we can use msgsnd to IPI any cpu */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + msg |= get_hard_smp_processor_id(cpu); + smp_mb(); + __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); + return true; + } + /* On POWER8 for IPIs to threads in the same core, use msgsnd */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) { preempt_disable(); if (cpu_first_thread_sibling(cpu) == cpu_first_thread_sibling(smp_processor_id())) { - unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); msg |= cpu_thread_in_core(cpu); smp_mb(); __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); @@ -162,8 +158,12 @@ static bool kvmppc_ipi_thread(int cpu) } #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) - if (cpu >= 0 && cpu < nr_cpu_ids && paca[cpu].kvm_hstate.xics_phys) { - xics_wake_cpu(cpu); + if (cpu >= 0 && cpu < nr_cpu_ids) { + if (paca[cpu].kvm_hstate.xics_phys) { + xics_wake_cpu(cpu); + return true; + } + opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY); return true; } #endif @@ -299,41 +299,54 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) vcpu->arch.pvr = pvr; } +/* Dummy value used in computing PCR value below */ +#define PCR_ARCH_300 (PCR_ARCH_207 << 1) + static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { - unsigned long pcr = 0; + unsigned long host_pcr_bit = 0, guest_pcr_bit = 0; struct kvmppc_vcore *vc = vcpu->arch.vcore; + /* We can (emulate) our own architecture version and anything older */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + host_pcr_bit = PCR_ARCH_300; + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + host_pcr_bit = PCR_ARCH_207; + else if (cpu_has_feature(CPU_FTR_ARCH_206)) + host_pcr_bit = PCR_ARCH_206; + else + host_pcr_bit = PCR_ARCH_205; + + /* Determine lowest PCR bit needed to run guest in given PVR level */ + guest_pcr_bit = host_pcr_bit; if (arch_compat) { switch (arch_compat) { case PVR_ARCH_205: - /* - * If an arch bit is set in PCR, all the defined - * higher-order arch bits also have to be set. - */ - pcr = PCR_ARCH_206 | PCR_ARCH_205; + guest_pcr_bit = PCR_ARCH_205; break; case PVR_ARCH_206: case PVR_ARCH_206p: - pcr = PCR_ARCH_206; + guest_pcr_bit = PCR_ARCH_206; break; case PVR_ARCH_207: + guest_pcr_bit = PCR_ARCH_207; + break; + case PVR_ARCH_300: + guest_pcr_bit = PCR_ARCH_300; break; default: return -EINVAL; } - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) { - /* POWER7 can't emulate POWER8 */ - if (!(pcr & PCR_ARCH_206)) - return -EINVAL; - pcr &= ~PCR_ARCH_206; - } } + /* Check requested PCR bits don't exceed our capabilities */ + if (guest_pcr_bit > host_pcr_bit) + return -EINVAL; + spin_lock(&vc->lock); vc->arch_compat = arch_compat; - vc->pcr = pcr; + /* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */ + vc->pcr = host_pcr_bit - guest_pcr_bit; spin_unlock(&vc->lock); return 0; @@ -945,6 +958,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOK3S_INTERRUPT_EXTERNAL: case BOOK3S_INTERRUPT_H_DOORBELL: + case BOOK3S_INTERRUPT_H_VIRT: vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; @@ -1229,6 +1243,12 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_WORT: *val = get_reg_val(id, vcpu->arch.wort); break; + case KVM_REG_PPC_TIDR: + *val = get_reg_val(id, vcpu->arch.tid); + break; + case KVM_REG_PPC_PSSCR: + *val = get_reg_val(id, vcpu->arch.psscr); + break; case KVM_REG_PPC_VPA_ADDR: spin_lock(&vcpu->arch.vpa_update_lock); *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); @@ -1288,6 +1308,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TM_CR: *val = get_reg_val(id, vcpu->arch.cr_tm); break; + case KVM_REG_PPC_TM_XER: + *val = get_reg_val(id, vcpu->arch.xer_tm); + break; case KVM_REG_PPC_TM_LR: *val = get_reg_val(id, vcpu->arch.lr_tm); break; @@ -1427,6 +1450,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_WORT: vcpu->arch.wort = set_reg_val(id, *val); break; + case KVM_REG_PPC_TIDR: + vcpu->arch.tid = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PSSCR: + vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS; + break; case KVM_REG_PPC_VPA_ADDR: addr = set_reg_val(id, *val); r = -EINVAL; @@ -1498,6 +1527,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TM_CR: vcpu->arch.cr_tm = set_reg_val(id, *val); break; + case KVM_REG_PPC_TM_XER: + vcpu->arch.xer_tm = set_reg_val(id, *val); + break; case KVM_REG_PPC_TM_LR: vcpu->arch.lr_tm = set_reg_val(id, *val); break; @@ -1540,6 +1572,20 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, return r; } +/* + * On POWER9, threads are independent and can be in different partitions. + * Therefore we consider each thread to be a subcore. + * There is a restriction that all threads have to be in the same + * MMU mode (radix or HPT), unfortunately, but since we only support + * HPT guests on a HPT host so far, that isn't an impediment yet. + */ +static int threads_per_vcore(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return 1; + return threads_per_subcore; +} + static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) { struct kvmppc_vcore *vcore; @@ -1554,7 +1600,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) init_swait_queue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; - vcore->first_vcpuid = core * threads_per_subcore; + vcore->first_vcpuid = core * threads_per_vcore(); vcore->kvm = kvm; INIT_LIST_HEAD(&vcore->preempt_list); @@ -1717,7 +1763,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, int core; struct kvmppc_vcore *vcore; - core = id / threads_per_subcore; + core = id / threads_per_vcore(); if (core >= KVM_MAX_VCORES) goto out; @@ -1935,7 +1981,10 @@ static void kvmppc_wait_for_nap(void) { int cpu = smp_processor_id(); int i, loops; + int n_threads = threads_per_vcore(); + if (n_threads <= 1) + return; for (loops = 0; loops < 1000000; ++loops) { /* * Check if all threads are finished. @@ -1943,17 +1992,17 @@ static void kvmppc_wait_for_nap(void) * and the thread clears it when finished, so we look * for any threads that still have a non-NULL vcore ptr. */ - for (i = 1; i < threads_per_subcore; ++i) + for (i = 1; i < n_threads; ++i) if (paca[cpu + i].kvm_hstate.kvm_vcore) break; - if (i == threads_per_subcore) { + if (i == n_threads) { HMT_medium(); return; } HMT_low(); } HMT_medium(); - for (i = 1; i < threads_per_subcore; ++i) + for (i = 1; i < n_threads; ++i) if (paca[cpu + i].kvm_hstate.kvm_vcore) pr_err("KVM: CPU %d seems to be stuck\n", cpu + i); } @@ -2019,7 +2068,7 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc) vc->vcore_state = VCORE_PREEMPT; vc->pcpu = smp_processor_id(); - if (vc->num_threads < threads_per_subcore) { + if (vc->num_threads < threads_per_vcore()) { spin_lock(&lp->lock); list_add_tail(&vc->preempt_list, &lp->list); spin_unlock(&lp->lock); @@ -2123,8 +2172,7 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) cip->subcore_threads[sub] = vc->num_threads; cip->subcore_vm[sub] = vc->kvm; init_master_vcore(vc); - list_del(&vc->preempt_list); - list_add_tail(&vc->preempt_list, &cip->vcs[sub]); + list_move_tail(&vc->preempt_list, &cip->vcs[sub]); return true; } @@ -2254,12 +2302,12 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) * enter the guest. Only do this if it is the primary thread of the * core (not if a subcore) that is entering the guest. */ -static inline void kvmppc_clear_host_core(int cpu) +static inline int kvmppc_clear_host_core(unsigned int cpu) { int core; if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu)) - return; + return 0; /* * Memory barrier can be omitted here as we will do a smp_wmb() * later in kvmppc_start_thread and we need ensure that state is @@ -2267,6 +2315,7 @@ static inline void kvmppc_clear_host_core(int cpu) */ core = cpu >> threads_shift; kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0; + return 0; } /* @@ -2274,12 +2323,12 @@ static inline void kvmppc_clear_host_core(int cpu) * Only need to do this if it is the primary thread of the core that is * exiting. */ -static inline void kvmppc_set_host_core(int cpu) +static inline int kvmppc_set_host_core(unsigned int cpu) { int core; if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu)) - return; + return 0; /* * Memory barrier can be omitted here because we do a spin_unlock @@ -2287,6 +2336,7 @@ static inline void kvmppc_set_host_core(int cpu) */ core = cpu >> threads_shift; kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1; + return 0; } /* @@ -2307,6 +2357,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) unsigned long cmd_bit, stat_bit; int pcpu, thr; int target_threads; + int controlled_threads; /* * Remove from the list any threads that have a signal pending @@ -2325,11 +2376,18 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) vc->preempt_tb = TB_NIL; /* + * Number of threads that we will be controlling: the same as + * the number of threads per subcore, except on POWER9, + * where it's 1 because the threads are (mostly) independent. + */ + controlled_threads = threads_per_vcore(); + + /* * Make sure we are running on primary threads, and that secondary * threads are offline. Also check if the number of threads in this * guest are greater than the current system threads per guest. */ - if ((threads_per_core > 1) && + if ((controlled_threads > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { for_each_runnable_thread(i, vcpu, vc) { vcpu->arch.ret = -EBUSY; @@ -2345,7 +2403,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ init_core_info(&core_info, vc); pcpu = smp_processor_id(); - target_threads = threads_per_subcore; + target_threads = controlled_threads; if (target_smt_mode && target_smt_mode < target_threads) target_threads = target_smt_mode; if (vc->num_threads < target_threads) @@ -2381,7 +2439,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) smp_wmb(); } pcpu = smp_processor_id(); - for (thr = 0; thr < threads_per_subcore; ++thr) + for (thr = 0; thr < controlled_threads; ++thr) paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; /* Initiate micro-threading (split-core) if required */ @@ -2491,7 +2549,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) } /* Let secondaries go back to the offline loop */ - for (i = 0; i < threads_per_subcore; ++i) { + for (i = 0; i < controlled_threads; ++i) { kvmppc_release_hwthread(pcpu + i); if (sip && sip->napped[i]) kvmppc_ipi_thread(pcpu + i); @@ -2543,9 +2601,6 @@ static void grow_halt_poll_ns(struct kvmppc_vcore *vc) vc->halt_poll_ns = 10000; else vc->halt_poll_ns *= halt_poll_ns_grow; - - if (vc->halt_poll_ns > halt_poll_max_ns) - vc->halt_poll_ns = halt_poll_max_ns; } static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) @@ -2556,7 +2611,8 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) vc->halt_poll_ns /= halt_poll_ns_shrink; } -/* Check to see if any of the runnable vcpus on the vcore have pending +/* + * Check to see if any of the runnable vcpus on the vcore have pending * exceptions or are no longer ceded */ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) @@ -2655,16 +2711,18 @@ out: } /* Adjust poll time */ - if (halt_poll_max_ns) { + if (halt_poll_ns) { if (block_ns <= vc->halt_poll_ns) ; /* We slept and blocked for longer than the max halt time */ - else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns) + else if (vc->halt_poll_ns && block_ns > halt_poll_ns) shrink_halt_poll_ns(vc); /* We slept and our poll time is too small */ - else if (vc->halt_poll_ns < halt_poll_max_ns && - block_ns < halt_poll_max_ns) + else if (vc->halt_poll_ns < halt_poll_ns && + block_ns < halt_poll_ns) grow_halt_poll_ns(vc); + if (vc->halt_poll_ns > halt_poll_ns) + vc->halt_poll_ns = halt_poll_ns; } else vc->halt_poll_ns = 0; @@ -2971,6 +3029,15 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, struct kvm_memslots *slots; struct kvm_memory_slot *memslot; + /* + * If we are making a new memslot, it might make + * some address that was previously cached as emulated + * MMIO be no longer emulated MMIO, so invalidate + * all the caches of emulated MMIO translations. + */ + if (npages) + atomic64_inc(&kvm->arch.mmio_update); + if (npages && old->npages) { /* * If modifying a memslot, reset all the rmap dirty bits. @@ -3015,6 +3082,22 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) return; } +static void kvmppc_setup_partition_table(struct kvm *kvm) +{ + unsigned long dw0, dw1; + + /* PS field - page size for VRMA */ + dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) | + ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1); + /* HTABSIZE and HTABORG fields */ + dw0 |= kvm->arch.sdr1; + + /* Second dword has GR=0; other fields are unused since UPRT=0 */ + dw1 = 0; + + mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1); +} + static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { int err = 0; @@ -3066,17 +3149,20 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) psize == 0x1000000)) goto out_srcu; - /* Update VRMASD field in the LPCR */ senc = slb_pgsize_encoding(psize); kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | (VRMA_VSID << SLB_VSID_SHIFT_1T); - /* the -4 is to account for senc values starting at 0x10 */ - lpcr = senc << (LPCR_VRMASD_SH - 4); - /* Create HPTEs in the hash page table for the VRMA */ kvmppc_map_vrma(vcpu, memslot, porder); - kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); + /* Update VRMASD field in the LPCR */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + /* the -4 is to account for senc values starting at 0x10 */ + lpcr = senc << (LPCR_VRMASD_SH - 4); + kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); + } else { + kvmppc_setup_partition_table(kvm); + } /* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */ smp_wmb(); @@ -3094,36 +3180,6 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) } #ifdef CONFIG_KVM_XICS -static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action, - void *hcpu) -{ - unsigned long cpu = (long)hcpu; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - kvmppc_set_host_core(cpu); - break; - -#ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - case CPU_DEAD_FROZEN: - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - kvmppc_clear_host_core(cpu); - break; -#endif - default: - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block kvmppc_cpu_notifier = { - .notifier_call = kvmppc_cpu_notify, -}; - /* * Allocate a per-core structure for managing state about which cores are * running in the host versus the guest and for exchanging data between @@ -3185,15 +3241,17 @@ void kvmppc_alloc_host_rm_ops(void) return; } - register_cpu_notifier(&kvmppc_cpu_notifier); - + cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE, + "ppc/kvm_book3s:prepare", + kvmppc_set_host_core, + kvmppc_clear_host_core); put_online_cpus(); } void kvmppc_free_host_rm_ops(void) { if (kvmppc_host_rm_ops_hv) { - unregister_cpu_notifier(&kvmppc_cpu_notifier); + cpuhp_remove_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE); kfree(kvmppc_host_rm_ops_hv->rm_core); kfree(kvmppc_host_rm_ops_hv); kvmppc_host_rm_ops_hv = NULL; @@ -3219,14 +3277,18 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) * Since we don't flush the TLB when tearing down a VM, * and this lpid might have previously been used, * make sure we flush on each core before running the new VM. + * On POWER9, the tlbie in mmu_partition_table_set_entry() + * does this flush for us. */ - cpumask_setall(&kvm->arch.need_tlb_flush); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + cpumask_setall(&kvm->arch.need_tlb_flush); /* Start out with the default set of hcalls enabled */ memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, sizeof(kvm->arch.enabled_hcalls)); - kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); /* Init LPCR for virtual RMA mode */ kvm->arch.host_lpid = mfspr(SPRN_LPID); @@ -3239,9 +3301,29 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) /* On POWER8 turn on online bit to enable PURR/SPURR */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) lpcr |= LPCR_ONL; + /* + * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed) + * Set HVICE bit to enable hypervisor virtualization interrupts. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + lpcr &= ~LPCR_VPM0; + lpcr |= LPCR_HVICE; + } + kvm->arch.lpcr = lpcr; /* + * Work out how many sets the TLB has, for the use of + * the TLB invalidation loop in book3s_hv_rmhandlers.S. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */ + else + kvm->arch.tlb_sets = POWER7_TLB_SETS; /* 128 */ + + /* * Track that we now have a HV mode VM active. This blocks secondary * CPU threads from coming online. */ @@ -3305,9 +3387,9 @@ static int kvmppc_core_check_processor_compat_hv(void) !cpu_has_feature(CPU_FTR_ARCH_206)) return -EIO; /* - * Disable KVM for Power9, untill the required bits merged. + * Disable KVM for Power9 in radix mode. */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled()) return -EIO; return 0; @@ -3661,6 +3743,23 @@ static int kvmppc_book3s_init_hv(void) if (r) return r; + /* + * We need a way of accessing the XICS interrupt controller, + * either directly, via paca[cpu].kvm_hstate.xics_phys, or + * indirectly, via OPAL. + */ +#ifdef CONFIG_SMP + if (!get_paca()->kvm_hstate.xics_phys) { + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); + if (!np) { + pr_err("KVM-HV: Cannot determine method for accessing XICS\n"); + return -ENODEV; + } + } +#endif + kvm_ops_hv.owner = THIS_MODULE; kvmppc_hv_ops = &kvm_ops_hv; @@ -3683,3 +3782,4 @@ module_exit(kvmppc_book3s_exit_hv); MODULE_LICENSE("GPL"); MODULE_ALIAS_MISCDEV(KVM_MINOR); MODULE_ALIAS("devname:kvm"); + diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 0c84d6bc8356..5bb24be0b346 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -26,6 +26,8 @@ #include <asm/dbell.h> #include <asm/cputhreads.h> #include <asm/io.h> +#include <asm/opal.h> +#include <asm/smp.h> #define KVM_CMA_CHUNK_ORDER 18 @@ -205,12 +207,18 @@ static inline void rm_writeb(unsigned long paddr, u8 val) void kvmhv_rm_send_ipi(int cpu) { unsigned long xics_phys; + unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); - /* On POWER8 for IPIs to threads in the same core, use msgsnd */ + /* On POWER9 we can use msgsnd for any destination cpu. */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + msg |= get_hard_smp_processor_id(cpu); + __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); + return; + } + /* On POWER8 for IPIs to threads in the same core, use msgsnd. */ if (cpu_has_feature(CPU_FTR_ARCH_207S) && cpu_first_thread_sibling(cpu) == cpu_first_thread_sibling(raw_smp_processor_id())) { - unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); msg |= cpu_thread_in_core(cpu); __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); return; @@ -218,7 +226,11 @@ void kvmhv_rm_send_ipi(int cpu) /* Else poke the target with an IPI */ xics_phys = paca[cpu].kvm_hstate.xics_phys; - rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); + if (xics_phys) + rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); + else + opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu), + IPI_PRIORITY); } /* @@ -329,7 +341,7 @@ static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap, * saved a copy of the XIRR in the PACA, it will be picked up by * the host ICP driver. */ -static int kvmppc_check_passthru(u32 xisr, __be32 xirr) +static int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again) { struct kvmppc_passthru_irqmap *pimap; struct kvmppc_irq_map *irq_map; @@ -348,11 +360,11 @@ static int kvmppc_check_passthru(u32 xisr, __be32 xirr) /* We're handling this interrupt, generic code doesn't need to */ local_paca->kvm_hstate.saved_xirr = 0; - return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap); + return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap, again); } #else -static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) +static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again) { return 1; } @@ -367,14 +379,31 @@ static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) * -1 if there was a guest wakeup IPI (which has now been cleared) * -2 if there is PCI passthrough external interrupt that was handled */ +static long kvmppc_read_one_intr(bool *again); long kvmppc_read_intr(void) { + long ret = 0; + long rc; + bool again; + + do { + again = false; + rc = kvmppc_read_one_intr(&again); + if (rc && (ret == 0 || rc > ret)) + ret = rc; + } while (again); + return ret; +} + +static long kvmppc_read_one_intr(bool *again) +{ unsigned long xics_phys; u32 h_xirr; __be32 xirr; u32 xisr; u8 host_ipi; + int64_t rc; /* see if a host IPI is pending */ host_ipi = local_paca->kvm_hstate.host_ipi; @@ -383,8 +412,14 @@ long kvmppc_read_intr(void) /* Now read the interrupt from the ICP */ xics_phys = local_paca->kvm_hstate.xics_phys; - if (unlikely(!xics_phys)) - return 1; + if (!xics_phys) { + /* Use OPAL to read the XIRR */ + rc = opal_rm_int_get_xirr(&xirr, false); + if (rc < 0) + return 1; + } else { + xirr = _lwzcix(xics_phys + XICS_XIRR); + } /* * Save XIRR for later. Since we get control in reverse endian @@ -392,7 +427,6 @@ long kvmppc_read_intr(void) * host endian. Note that xirr is the value read from the * XIRR register, while h_xirr is the host endian version. */ - xirr = _lwzcix(xics_phys + XICS_XIRR); h_xirr = be32_to_cpu(xirr); local_paca->kvm_hstate.saved_xirr = h_xirr; xisr = h_xirr & 0xffffff; @@ -411,8 +445,16 @@ long kvmppc_read_intr(void) * If it is an IPI, clear the MFRR and EOI it. */ if (xisr == XICS_IPI) { - _stbcix(xics_phys + XICS_MFRR, 0xff); - _stwcix(xics_phys + XICS_XIRR, xirr); + if (xics_phys) { + _stbcix(xics_phys + XICS_MFRR, 0xff); + _stwcix(xics_phys + XICS_XIRR, xirr); + } else { + opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff); + rc = opal_rm_int_eoi(h_xirr); + /* If rc > 0, there is another interrupt pending */ + *again = rc > 0; + } + /* * Need to ensure side effects of above stores * complete before proceeding. @@ -429,7 +471,11 @@ long kvmppc_read_intr(void) /* We raced with the host, * we need to resend that IPI, bummer */ - _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); + if (xics_phys) + _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); + else + opal_rm_int_set_mfrr(hard_smp_processor_id(), + IPI_PRIORITY); /* Let side effects complete */ smp_mb(); return 1; @@ -440,5 +486,5 @@ long kvmppc_read_intr(void) return -1; } - return kvmppc_check_passthru(xisr, xirr); + return kvmppc_check_passthru(xisr, xirr, again); } diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 0fa70a9618d7..7ef0993214f3 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -16,6 +16,7 @@ #include <asm/machdep.h> #include <asm/cputhreads.h> #include <asm/hmi.h> +#include <asm/kvm_ppc.h> /* SRR1 bits for machine check on POWER7 */ #define SRR1_MC_LDSTERR (1ul << (63-42)) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 99b4e9d5dd23..9ef3c4be952f 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -264,8 +264,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, if (pa) pteh |= HPTE_V_VALID; - else + else { pteh |= HPTE_V_ABSENT; + ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); + } /*If we had host pte mapping then Check WIMG */ if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { @@ -351,6 +353,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; + ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); unlock_rmap(rmap); } else { kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, @@ -361,6 +364,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } } + /* Convert to new format on P9 */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + ptel = hpte_old_to_new_r(pteh, ptel); + pteh = hpte_old_to_new_v(pteh); + } hpte[1] = cpu_to_be64(ptel); /* Write the first HPTE dword, unlocking the HPTE and making it valid */ @@ -386,6 +394,13 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) #endif +static inline int is_mmio_hpte(unsigned long v, unsigned long r) +{ + return ((v & HPTE_V_ABSENT) && + (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == + (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); +} + static inline int try_lock_tlbie(unsigned int *lock) { unsigned int tmp, old; @@ -409,13 +424,18 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, { long i; + /* + * We use the POWER9 5-operand versions of tlbie and tlbiel here. + * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores + * the RS field, this is backwards-compatible with P7 and P8. + */ if (global) { while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) cpu_relax(); if (need_sync) asm volatile("ptesync" : : : "memory"); for (i = 0; i < npages; ++i) - asm volatile(PPC_TLBIE(%1,%0) : : + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : "r" (rbvalues[i]), "r" (kvm->arch.lpid)); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); kvm->arch.tlbie_lock = 0; @@ -423,7 +443,8 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, if (need_sync) asm volatile("ptesync" : : : "memory"); for (i = 0; i < npages; ++i) - asm volatile("tlbiel %0" : : "r" (rbvalues[i])); + asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : : + "r" (rbvalues[i]), "r" (0)); asm volatile("ptesync" : : : "memory"); } } @@ -435,18 +456,23 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, __be64 *hpte; unsigned long v, r, rb; struct revmap_entry *rev; - u64 pte; + u64 pte, orig_pte, pte_r; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - pte = be64_to_cpu(hpte[0]); + pte = orig_pte = be64_to_cpu(hpte[0]); + pte_r = be64_to_cpu(hpte[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + pte = hpte_new_to_old_v(pte, pte_r); + pte_r = hpte_new_to_old_r(pte_r); + } if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || ((flags & H_ANDCOND) && (pte & avpn) != 0)) { - __unlock_hpte(hpte, pte); + __unlock_hpte(hpte, orig_pte); return H_NOT_FOUND; } @@ -454,7 +480,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, v = pte & ~HPTE_V_HVLOCK; if (v & HPTE_V_VALID) { hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); - rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index); + rb = compute_tlbie_rb(v, pte_r, pte_index); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* * The reference (R) and change (C) bits in a HPT @@ -472,6 +498,9 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, note_hpte_modification(kvm, rev); unlock_hpte(hpte, 0); + if (is_mmio_hpte(v, pte_r)) + atomic64_inc(&kvm->arch.mmio_update); + if (v & HPTE_V_ABSENT) v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; hpret[0] = v; @@ -498,7 +527,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) int global; long int ret = H_SUCCESS; struct revmap_entry *rev, *revs[4]; - u64 hp0; + u64 hp0, hp1; global = global_invalidates(kvm, 0); for (i = 0; i < 4 && ret == H_SUCCESS; ) { @@ -531,6 +560,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) } found = 0; hp0 = be64_to_cpu(hp[0]); + hp1 = be64_to_cpu(hp[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hp0 = hpte_new_to_old_v(hp0, hp1); + hp1 = hpte_new_to_old_r(hp1); + } if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { switch (flags & 3) { case 0: /* absolute */ @@ -561,13 +595,14 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); hp[0] = 0; + if (is_mmio_hpte(hp0, hp1)) + atomic64_inc(&kvm->arch.mmio_update); continue; } /* leave it locked */ hp[0] &= ~cpu_to_be64(HPTE_V_VALID); - tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]), - be64_to_cpu(hp[1]), pte_index); + tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index); indexes[n] = j; hptes[n] = hp; revs[n] = rev; @@ -605,7 +640,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, __be64 *hpte; struct revmap_entry *rev; unsigned long v, r, rb, mask, bits; - u64 pte; + u64 pte_v, pte_r; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; @@ -613,14 +648,16 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - pte = be64_to_cpu(hpte[0]); - if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || - ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) { - __unlock_hpte(hpte, pte); + v = pte_v = be64_to_cpu(hpte[0]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1])); + if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) { + __unlock_hpte(hpte, pte_v); return H_NOT_FOUND; } - v = pte; + pte_r = be64_to_cpu(hpte[1]); bits = (flags << 55) & HPTE_R_PP0; bits |= (flags << 48) & HPTE_R_KEY_HI; bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); @@ -642,22 +679,26 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, * readonly to writable. If it should be writable, we'll * take a trap and let the page fault code sort it out. */ - pte = be64_to_cpu(hpte[1]); - r = (pte & ~mask) | bits; - if (hpte_is_writable(r) && !hpte_is_writable(pte)) + r = (pte_r & ~mask) | bits; + if (hpte_is_writable(r) && !hpte_is_writable(pte_r)) r = hpte_make_readonly(r); /* If the PTE is changing, invalidate it first */ - if (r != pte) { + if (r != pte_r) { rb = compute_tlbie_rb(v, r, pte_index); - hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) | + hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | HPTE_V_ABSENT); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); + /* Don't lose R/C bit updates done by hardware */ + r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); hpte[1] = cpu_to_be64(r); } } - unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); + unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK); asm volatile("ptesync" : : : "memory"); + if (is_mmio_hpte(v, pte_r)) + atomic64_inc(&kvm->arch.mmio_update); + return H_SUCCESS; } @@ -681,6 +722,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; r = be64_to_cpu(hpte[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + v = hpte_new_to_old_v(v, r); + r = hpte_new_to_old_r(r); + } if (v & HPTE_V_ABSENT) { v &= ~HPTE_V_ABSENT; v |= HPTE_V_VALID; @@ -798,10 +843,16 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { unsigned long rb; + u64 hp0, hp1; hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); - rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), - pte_index); + hp0 = be64_to_cpu(hptep[0]); + hp1 = be64_to_cpu(hptep[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hp0 = hpte_new_to_old_v(hp0, hp1); + hp1 = hpte_new_to_old_r(hp1); + } + rb = compute_tlbie_rb(hp0, hp1, pte_index); do_tlbies(kvm, &rb, 1, 1, true); } EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); @@ -811,9 +862,15 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, { unsigned long rb; unsigned char rbyte; + u64 hp0, hp1; - rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), - pte_index); + hp0 = be64_to_cpu(hptep[0]); + hp1 = be64_to_cpu(hptep[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hp0 = hpte_new_to_old_v(hp0, hp1); + hp1 = hpte_new_to_old_r(hp1); + } + rb = compute_tlbie_rb(hp0, hp1, pte_index); rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; /* modify only the second-last byte, which contains the ref bit */ *((char *)hptep + 14) = rbyte; @@ -828,6 +885,37 @@ static int slb_base_page_shift[4] = { 20, /* 1M, unsupported */ }; +static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu, + unsigned long eaddr, unsigned long slb_v, long mmio_update) +{ + struct mmio_hpte_cache_entry *entry = NULL; + unsigned int pshift; + unsigned int i; + + for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) { + entry = &vcpu->arch.mmio_cache.entry[i]; + if (entry->mmio_update == mmio_update) { + pshift = entry->slb_base_pshift; + if ((entry->eaddr >> pshift) == (eaddr >> pshift) && + entry->slb_v == slb_v) + return entry; + } + } + return NULL; +} + +static struct mmio_hpte_cache_entry * + next_mmio_cache_entry(struct kvm_vcpu *vcpu) +{ + unsigned int index = vcpu->arch.mmio_cache.index; + + vcpu->arch.mmio_cache.index++; + if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE) + vcpu->arch.mmio_cache.index = 0; + + return &vcpu->arch.mmio_cache.entry[index]; +} + /* When called from virtmode, this func should be protected by * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK * can trigger deadlock issue. @@ -842,7 +930,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, unsigned long avpn; __be64 *hpte; unsigned long mask, val; - unsigned long v, r; + unsigned long v, r, orig_v; /* Get page shift, work out hash and AVPN etc. */ mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; @@ -877,6 +965,8 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, for (i = 0; i < 16; i += 2) { /* Read the PTE racily */ v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1])); /* Check valid/absent, hash, segment size and AVPN */ if (!(v & valid) || (v & mask) != val) @@ -885,8 +975,12 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, /* Lock the PTE and read it under the lock */ while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) cpu_relax(); - v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; + v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; r = be64_to_cpu(hpte[i+1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + v = hpte_new_to_old_v(v, r); + r = hpte_new_to_old_r(r); + } /* * Check the HPTE again, including base page size @@ -896,7 +990,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, /* Return with the HPTE still locked */ return (hash << 3) + (i >> 1); - __unlock_hpte(&hpte[i], v); + __unlock_hpte(&hpte[i], orig_v); } if (val & HPTE_V_SECONDARY) @@ -924,30 +1018,45 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, { struct kvm *kvm = vcpu->kvm; long int index; - unsigned long v, r, gr; + unsigned long v, r, gr, orig_v; __be64 *hpte; unsigned long valid; struct revmap_entry *rev; unsigned long pp, key; + struct mmio_hpte_cache_entry *cache_entry = NULL; + long mmio_update = 0; /* For protection fault, expect to find a valid HPTE */ valid = HPTE_V_VALID; - if (status & DSISR_NOHPTE) + if (status & DSISR_NOHPTE) { valid |= HPTE_V_ABSENT; - - index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); - if (index < 0) { - if (status & DSISR_NOHPTE) - return status; /* there really was no HPTE */ - return 0; /* for prot fault, HPTE disappeared */ + mmio_update = atomic64_read(&kvm->arch.mmio_update); + cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update); } - hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); - v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; - r = be64_to_cpu(hpte[1]); - rev = real_vmalloc_addr(&kvm->arch.revmap[index]); - gr = rev->guest_rpte; + if (cache_entry) { + index = cache_entry->pte_index; + v = cache_entry->hpte_v; + r = cache_entry->hpte_r; + gr = cache_entry->rpte; + } else { + index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); + if (index < 0) { + if (status & DSISR_NOHPTE) + return status; /* there really was no HPTE */ + return 0; /* for prot fault, HPTE disappeared */ + } + hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + v = hpte_new_to_old_v(v, r); + r = hpte_new_to_old_r(r); + } + rev = real_vmalloc_addr(&kvm->arch.revmap[index]); + gr = rev->guest_rpte; - unlock_hpte(hpte, v); + unlock_hpte(hpte, orig_v); + } /* For not found, if the HPTE is valid by now, retry the instruction */ if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) @@ -985,12 +1094,32 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, vcpu->arch.pgfault_index = index; vcpu->arch.pgfault_hpte[0] = v; vcpu->arch.pgfault_hpte[1] = r; + vcpu->arch.pgfault_cache = cache_entry; /* Check the storage key to see if it is possibly emulated MMIO */ - if (data && (vcpu->arch.shregs.msr & MSR_IR) && - (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == - (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) - return -2; /* MMIO emulation - load instr word */ + if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == + (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) { + if (!cache_entry) { + unsigned int pshift = 12; + unsigned int pshift_index; + + if (slb_v & SLB_VSID_L) { + pshift_index = ((slb_v & SLB_VSID_LP) >> 4); + pshift = slb_base_page_shift[pshift_index]; + } + cache_entry = next_mmio_cache_entry(vcpu); + cache_entry->eaddr = addr; + cache_entry->slb_base_pshift = pshift; + cache_entry->pte_index = index; + cache_entry->hpte_v = v; + cache_entry->hpte_r = r; + cache_entry->rpte = gr; + cache_entry->slb_v = slb_v; + cache_entry->mmio_update = mmio_update; + } + if (data && (vcpu->arch.shregs.msr & MSR_IR)) + return -2; /* MMIO emulation - load instr word */ + } return -1; /* send fault up to host kernel mode */ } diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index a0ea63ac2b52..06edc4366639 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -70,7 +70,11 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) hcpu = hcore << threads_shift; kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); - icp_native_cause_ipi_rm(hcpu); + if (paca[hcpu].kvm_hstate.xics_phys) + icp_native_cause_ipi_rm(hcpu); + else + opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu), + IPI_PRIORITY); } #else static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } @@ -737,7 +741,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) unsigned long eoi_rc; -static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) +static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) { unsigned long xics_phys; int64_t rc; @@ -751,7 +755,12 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) /* EOI it */ xics_phys = local_paca->kvm_hstate.xics_phys; - _stwcix(xics_phys + XICS_XIRR, xirr); + if (xics_phys) { + _stwcix(xics_phys + XICS_XIRR, xirr); + } else { + rc = opal_rm_int_eoi(be32_to_cpu(xirr)); + *again = rc > 0; + } } static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu) @@ -809,9 +818,10 @@ static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc) } long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, - u32 xirr, + __be32 xirr, struct kvmppc_irq_map *irq_map, - struct kvmppc_passthru_irqmap *pimap) + struct kvmppc_passthru_irqmap *pimap, + bool *again) { struct kvmppc_xics *xics; struct kvmppc_icp *icp; @@ -825,7 +835,8 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, icp_rm_deliver_irq(xics, icp, irq); /* EOI the interrupt */ - icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr); + icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr, + again); if (check_too_hard(xics, icp) == H_TOO_HARD) return 2; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c3c1d1bcfc67..9338a818e05c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -501,17 +501,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi r0, 0 beq 57f li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4 - mfspr r4, SPRN_LPCR - rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) - mtspr SPRN_LPCR, r4 - isync - std r0, HSTATE_SCRATCH0(r13) - ptesync - ld r0, HSTATE_SCRATCH0(r13) -1: cmpd r0, r0 - bne 1b - nap - b . + mfspr r5, SPRN_LPCR + rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) + b kvm_nap_sequence 57: li r0, 0 stbx r0, r3, r4 @@ -523,6 +515,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * * *****************************************************************************/ +/* Stack frame offsets */ +#define STACK_SLOT_TID (112-16) +#define STACK_SLOT_PSSCR (112-24) + .global kvmppc_hv_entry kvmppc_hv_entry: @@ -581,12 +577,14 @@ kvmppc_hv_entry: ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ cmpwi r6,0 bne 10f - ld r6,KVM_SDR1(r9) lwz r7,KVM_LPID(r9) +BEGIN_FTR_SECTION + ld r6,KVM_SDR1(r9) li r0,LPID_RSVD /* switch to reserved LPID */ mtspr SPRN_LPID,r0 ptesync mtspr SPRN_SDR1,r6 /* switch to partition page table */ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPID,r7 isync @@ -607,12 +605,8 @@ kvmppc_hv_entry: stdcx. r7,0,r6 bne 23b /* Flush the TLB of any entries for this LPID */ - /* use arch 2.07S as a proxy for POWER8 */ -BEGIN_FTR_SECTION - li r6,512 /* POWER8 has 512 sets */ -FTR_SECTION_ELSE - li r6,128 /* POWER7 has 128 sets */ -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) + lwz r6,KVM_TLB_SETS(r9) + li r0,0 /* RS for P9 version of tlbiel */ mtctr r6 li r7,0x800 /* IS field = 0b10 */ ptesync @@ -698,6 +692,14 @@ kvmppc_got_guest: mtspr SPRN_PURR,r7 mtspr SPRN_SPURR,r8 + /* Save host values of some registers */ +BEGIN_FTR_SECTION + mfspr r5, SPRN_TIDR + mfspr r6, SPRN_PSSCR + std r5, STACK_SLOT_TID(r1) + std r6, STACK_SLOT_PSSCR(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + BEGIN_FTR_SECTION /* Set partition DABR */ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ @@ -750,14 +752,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) BEGIN_FTR_SECTION ld r5, VCPU_MMCR + 24(r4) ld r6, VCPU_SIER(r4) + mtspr SPRN_MMCR2, r5 + mtspr SPRN_SIER, r6 +BEGIN_FTR_SECTION_NESTED(96) lwz r7, VCPU_PMC + 24(r4) lwz r8, VCPU_PMC + 28(r4) ld r9, VCPU_MMCR + 32(r4) - mtspr SPRN_MMCR2, r5 - mtspr SPRN_SIER, r6 mtspr SPRN_SPMC1, r7 mtspr SPRN_SPMC2, r8 mtspr SPRN_MMCRS, r9 +END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_MMCR0, r3 isync @@ -813,20 +817,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_EBBHR, r8 ld r5, VCPU_EBBRR(r4) ld r6, VCPU_BESCR(r4) - ld r7, VCPU_CSIGR(r4) - ld r8, VCPU_TACR(r4) + lwz r7, VCPU_GUEST_PID(r4) + ld r8, VCPU_WORT(r4) mtspr SPRN_EBBRR, r5 mtspr SPRN_BESCR, r6 - mtspr SPRN_CSIGR, r7 - mtspr SPRN_TACR, r8 + mtspr SPRN_PID, r7 + mtspr SPRN_WORT, r8 +BEGIN_FTR_SECTION + /* POWER8-only registers */ ld r5, VCPU_TCSCR(r4) ld r6, VCPU_ACOP(r4) - lwz r7, VCPU_GUEST_PID(r4) - ld r8, VCPU_WORT(r4) + ld r7, VCPU_CSIGR(r4) + ld r8, VCPU_TACR(r4) mtspr SPRN_TCSCR, r5 mtspr SPRN_ACOP, r6 - mtspr SPRN_PID, r7 - mtspr SPRN_WORT, r8 + mtspr SPRN_CSIGR, r7 + mtspr SPRN_TACR, r8 +FTR_SECTION_ELSE + /* POWER9-only registers */ + ld r5, VCPU_TID(r4) + ld r6, VCPU_PSSCR(r4) + oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */ + mtspr SPRN_TIDR, r5 + mtspr SPRN_PSSCR, r6 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 8: /* @@ -1341,20 +1355,29 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r8, VCPU_EBBHR(r9) mfspr r5, SPRN_EBBRR mfspr r6, SPRN_BESCR - mfspr r7, SPRN_CSIGR - mfspr r8, SPRN_TACR + mfspr r7, SPRN_PID + mfspr r8, SPRN_WORT std r5, VCPU_EBBRR(r9) std r6, VCPU_BESCR(r9) - std r7, VCPU_CSIGR(r9) - std r8, VCPU_TACR(r9) + stw r7, VCPU_GUEST_PID(r9) + std r8, VCPU_WORT(r9) +BEGIN_FTR_SECTION mfspr r5, SPRN_TCSCR mfspr r6, SPRN_ACOP - mfspr r7, SPRN_PID - mfspr r8, SPRN_WORT + mfspr r7, SPRN_CSIGR + mfspr r8, SPRN_TACR std r5, VCPU_TCSCR(r9) std r6, VCPU_ACOP(r9) - stw r7, VCPU_GUEST_PID(r9) - std r8, VCPU_WORT(r9) + std r7, VCPU_CSIGR(r9) + std r8, VCPU_TACR(r9) +FTR_SECTION_ELSE + mfspr r5, SPRN_TIDR + mfspr r6, SPRN_PSSCR + std r5, VCPU_TID(r9) + rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */ + rotldi r6, r6, 60 + std r6, VCPU_PSSCR(r9) +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) /* * Restore various registers to 0, where non-zero values * set by the guest could disrupt the host. @@ -1363,12 +1386,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_IAMR, r0 mtspr SPRN_CIABR, r0 mtspr SPRN_DAWRX, r0 - mtspr SPRN_TCSCR, r0 mtspr SPRN_WORT, r0 +BEGIN_FTR_SECTION + mtspr SPRN_TCSCR, r0 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ li r0, 1 sldi r0, r0, 31 mtspr SPRN_MMCRS, r0 +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 8: /* Save and reset AMR and UAMOR before turning on the MMU */ @@ -1502,15 +1527,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) stw r8, VCPU_PMC + 20(r9) BEGIN_FTR_SECTION mfspr r5, SPRN_SIER + std r5, VCPU_SIER(r9) +BEGIN_FTR_SECTION_NESTED(96) mfspr r6, SPRN_SPMC1 mfspr r7, SPRN_SPMC2 mfspr r8, SPRN_MMCRS - std r5, VCPU_SIER(r9) stw r6, VCPU_PMC + 24(r9) stw r7, VCPU_PMC + 28(r9) std r8, VCPU_MMCR + 32(r9) lis r4, 0x8000 mtspr SPRN_MMCRS, r4 +END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: /* Clear out SLB */ @@ -1519,6 +1546,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) slbia ptesync + /* Restore host values of some registers */ +BEGIN_FTR_SECTION + ld r5, STACK_SLOT_TID(r1) + ld r6, STACK_SLOT_PSSCR(r1) + mtspr SPRN_TIDR, r5 + mtspr SPRN_PSSCR, r6 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do @@ -1552,12 +1587,14 @@ kvmhv_switch_to_host: beq 19f /* Primary thread switches back to host partition */ - ld r6,KVM_HOST_SDR1(r4) lwz r7,KVM_HOST_LPID(r4) +BEGIN_FTR_SECTION + ld r6,KVM_HOST_SDR1(r4) li r8,LPID_RSVD /* switch to reserved LPID */ mtspr SPRN_LPID,r8 ptesync - mtspr SPRN_SDR1,r6 /* switch to partition page table */ + mtspr SPRN_SDR1,r6 /* switch to host page table */ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPID,r7 isync @@ -2211,6 +2248,21 @@ BEGIN_FTR_SECTION ori r5, r5, LPCR_PECEDH rlwimi r5, r3, 0, LPCR_PECEDP END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + +kvm_nap_sequence: /* desired LPCR value in r5 */ +BEGIN_FTR_SECTION + /* + * PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset) + * enable state loss = 1 (allow SMT mode switch) + * requested level = 0 (just stop dispatching) + */ + lis r3, (PSSCR_EC | PSSCR_ESL)@h + mtspr SPRN_PSSCR, r3 + /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */ + li r4, LPCR_PECE_HVEE@higher + sldi r4, r4, 32 + or r5, r5, r4 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mtspr SPRN_LPCR,r5 isync li r0, 0 @@ -2219,7 +2271,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ld r0, HSTATE_SCRATCH0(r13) 1: cmpd r0, r0 bne 1b +BEGIN_FTR_SECTION nap +FTR_SECTION_ELSE + PPC_STOP +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) b . 33: mr r4, r3 @@ -2600,11 +2656,13 @@ kvmppc_save_tm: mfctr r7 mfspr r8, SPRN_AMR mfspr r10, SPRN_TAR + mfxer r11 std r5, VCPU_LR_TM(r9) stw r6, VCPU_CR_TM(r9) std r7, VCPU_CTR_TM(r9) std r8, VCPU_AMR_TM(r9) std r10, VCPU_TAR_TM(r9) + std r11, VCPU_XER_TM(r9) /* Restore r12 as trap number. */ lwz r12, VCPU_TRAP(r9) @@ -2697,11 +2755,13 @@ kvmppc_restore_tm: ld r7, VCPU_CTR_TM(r4) ld r8, VCPU_AMR_TM(r4) ld r9, VCPU_TAR_TM(r4) + ld r10, VCPU_XER_TM(r4) mtlr r5 mtcr r6 mtctr r7 mtspr SPRN_AMR, r8 mtspr SPRN_TAR, r9 + mtxer r10 /* * Load up PPR and DSCR values but don't put them in the actual SPRs diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 70963c845e96..efd1183a6b16 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -536,7 +536,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_SPAPR_TCE_64: - case KVM_CAP_PPC_ALLOC_HTAB: case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_ENABLE_HCALL: @@ -545,13 +544,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #endif r = 1; break; + + case KVM_CAP_PPC_ALLOC_HTAB: + r = hv_enabled; + break; #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_SMT: - if (hv_enabled) - r = threads_per_subcore; - else - r = 0; + r = 0; + if (hv_enabled) { + if (cpu_has_feature(CPU_FTR_ARCH_300)) + r = 1; + else + r = threads_per_subcore; + } break; case KVM_CAP_PPC_RMA: r = 0; diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index fb21990c0fb4..ebc6dd449556 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -449,7 +449,7 @@ TRACE_EVENT(kvmppc_vcore_wakeup, __entry->tgid = current->tgid; ), - TP_printk("%s time %lld ns, tgid=%d", + TP_printk("%s time %llu ns, tgid=%d", __entry->waited ? "wait" : "poll", __entry->ns, __entry->tgid) ); diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 42c702b3be1f..6fa450c12d6d 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, */ rflags = htab_convert_pte_flags(new_pte); - if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 3bbbea07378c..1a68cb19b0e3 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -87,7 +87,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, subpg_pte = new_pte & ~subpg_prot; rflags = htab_convert_pte_flags(subpg_pte); - if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { /* @@ -258,7 +258,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, rflags = htab_convert_pte_flags(new_pte); - if (!cpu_has_feature(CPU_FTR_NOEXECUTE) && + if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 83ddc0e171b0..ad9fd5245be2 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -221,13 +221,18 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, return -1; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; if (!(vflags & HPTE_V_BOLTED)) { DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", i, hpte_v, hpte_r); } + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte_r = hpte_old_to_new_r(hpte_v, hpte_r); + hpte_v = hpte_old_to_new_v(hpte_v); + } + hptep->r = cpu_to_be64(hpte_r); /* Guarantee the second dword is visible before the valid bit */ eieio(); @@ -295,6 +300,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, vpn, want_v & HPTE_V_AVPN, slot, newpp); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* * We need to invalidate the TLB always because hpte_remove doesn't do * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less @@ -309,6 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, native_lock_hpte(hptep); /* recheck with locks held */ hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))) { ret = -1; @@ -350,6 +359,8 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) for (i = 0; i < HPTES_PER_GROUP; i++) { hptep = htab_address + slot; hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) /* HPTE matches */ @@ -409,6 +420,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, want_v = hpte_encode_avpn(vpn, bpsize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* * We need to invalidate the TLB always because hpte_remove doesn't do @@ -467,6 +480,8 @@ static void native_hugepage_invalidate(unsigned long vsid, want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) @@ -504,6 +519,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, /* Look at the 8 bit LP value */ unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte_v = hpte_new_to_old_v(hpte_v, hpte_r); + hpte_r = hpte_new_to_old_r(hpte_r); + } if (!(hpte_v & HPTE_V_LARGE)) { size = MMU_PAGE_4K; a_size = MMU_PAGE_4K; @@ -512,11 +531,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, a_size = hpte_page_sizes[lp] >> 4; } /* This works for all page sizes, and for 256M and 1T segments */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT; - else - *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; - + *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; shift = mmu_psize_defs[size].shift; avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); @@ -639,6 +654,9 @@ static void native_flush_hash_range(unsigned long number, int local) want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, + be64_to_cpu(hptep->r)); if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) native_unlock_hpte(hptep); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 78dabf065ba9..8410b4bb36ed 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -796,37 +796,17 @@ static void update_hid_for_hash(void) static void __init hash_init_partition_table(phys_addr_t hash_table, unsigned long htab_size) { - unsigned long ps_field; - unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; + mmu_partition_table_init(); /* - * slb llp encoding for the page size used in VPM real mode. - * We can ignore that for lpid 0 + * PS field (VRMA page size) is not used for LPID 0, hence set to 0. + * For now, UPRT is 0 and we have no segment table. */ - ps_field = 0; htab_size = __ilog2(htab_size) - 18; - - BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); - partition_tb = __va(memblock_alloc_base(patb_size, patb_size, - MEMBLOCK_ALLOC_ANYWHERE)); - - /* Initialize the Partition Table with no entries */ - memset((void *)partition_tb, 0, patb_size); - partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size); - /* - * FIXME!! This should be done via update_partition table - * For now UPRT is 0 for us. - */ - partition_tb->patb1 = 0; + mmu_partition_table_set_entry(0, hash_table | htab_size, 0); pr_info("Partition table %p\n", partition_tb); if (cpu_has_feature(CPU_FTR_POWER9_DD1)) update_hid_for_hash(); - /* - * update partition table control register, - * 64 K size. - */ - mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); - } static void __init htab_initialize(void) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index a51c188b81f3..0cb6bd8bfccf 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1085,7 +1085,7 @@ static int hot_add_node_scn_to_nid(unsigned long scn_addr) int hot_add_scn_to_nid(unsigned long scn_addr) { struct device_node *memory = NULL; - int nid, found = 0; + int nid; if (!numa_enabled || (min_common_depth < 0)) return first_online_node; @@ -1101,17 +1101,6 @@ int hot_add_scn_to_nid(unsigned long scn_addr) if (nid < 0 || !node_online(nid)) nid = first_online_node; - if (NODE_DATA(nid)->node_spanned_pages) - return nid; - - for_each_online_node(nid) { - if (NODE_DATA(nid)->node_spanned_pages) { - found = 1; - break; - } - } - - BUG_ON(!found); return nid; } diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 688b54517655..8d941c692eb3 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -177,23 +177,15 @@ redo: static void __init radix_init_partition_table(void) { - unsigned long rts_field; + unsigned long rts_field, dw0; + mmu_partition_table_init(); rts_field = radix__get_tree_size(); + dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR; + mmu_partition_table_set_entry(0, dw0, 0); - BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); - partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); - partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | - RADIX_PGD_INDEX_SIZE | PATB_HR); pr_info("Initializing Radix MMU\n"); pr_info("Partition table %p\n", partition_tb); - - memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); - /* - * update partition table control register, - * 64 K size. - */ - mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); } void __init radix_init_native(void) @@ -378,6 +370,8 @@ void __init radix__early_init_mmu(void) radix_init_partition_table(); } + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); + radix_init_pgtable(); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index f5e8d4edb808..8bca7f58afc4 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -431,3 +431,37 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) } } #endif + +#ifdef CONFIG_PPC_BOOK3S_64 +void __init mmu_partition_table_init(void) +{ + unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; + + BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); + partition_tb = __va(memblock_alloc_base(patb_size, patb_size, + MEMBLOCK_ALLOC_ANYWHERE)); + + /* Initialize the Partition Table with no entries */ + memset((void *)partition_tb, 0, patb_size); + + /* + * update partition table control register, + * 64 K size. + */ + mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); +} + +void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, + unsigned long dw1) +{ + partition_tb[lpid].patb0 = cpu_to_be64(dw0); + partition_tb[lpid].patb1 = cpu_to_be64(dw1); + + /* Global flush of TLBs and partition table caches for this lpid */ + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); +} +EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); +#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0fe98a567125..73a5cf18fd84 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -766,7 +766,7 @@ emit_clear: func = (u8 *) __bpf_call_base + imm; /* Save skb pointer if we need to re-cache skb data */ - if (bpf_helper_changes_skb_data(func)) + if (bpf_helper_changes_pkt_data(func)) PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_func_call(image, ctx, (u64)func); @@ -775,7 +775,7 @@ emit_clear: PPC_MR(b2p[BPF_REG_0], 3); /* refresh skb cache */ - if (bpf_helper_changes_skb_data(func)) { + if (bpf_helper_changes_pkt_data(func)) { /* reload skb pointer to r3 */ PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_skb_loads(image, ctx); diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index e3257f24a8a1..1d7c1b142bf4 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -64,6 +64,7 @@ config XILINX_VIRTEX_GENERIC_BOARD default n select XILINX_VIRTEX_II_PRO select XILINX_VIRTEX_4_FX + select XILINX_INTC help This option enables generic support for Xilinx Virtex based boards. diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c index 91a08ea758a8..e3d5e095846b 100644 --- a/arch/powerpc/platforms/40x/virtex.c +++ b/arch/powerpc/platforms/40x/virtex.c @@ -48,7 +48,7 @@ define_machine(virtex) { .probe = virtex_probe, .setup_arch = xilinx_pci_init, .init_IRQ = xilinx_intc_init_tree, - .get_irq = xilinx_intc_get_irq, + .get_irq = xintc_get_irq, .restart = ppc4xx_reset_system, .calibrate_decr = generic_calibrate_decr, }; diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 48fc18041ff6..25b8d641ff9f 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -241,6 +241,7 @@ config XILINX_VIRTEX440_GENERIC_BOARD depends on 44x default n select XILINX_VIRTEX_5_FXT + select XILINX_INTC help This option enables generic support for Xilinx Virtex based boards that use a 440 based processor in the Virtex 5 FXT FPGA architecture. diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c index a7e08026097a..3eb13ed926ee 100644 --- a/arch/powerpc/platforms/44x/virtex.c +++ b/arch/powerpc/platforms/44x/virtex.c @@ -54,7 +54,7 @@ define_machine(virtex) { .probe = virtex_probe, .setup_arch = xilinx_pci_init, .init_IRQ = xilinx_intc_init_tree, - .get_irq = xilinx_intc_get_irq, + .get_irq = xintc_get_irq, .calibrate_decr = generic_calibrate_decr, .restart = ppc4xx_reset_system, }; diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 44d2d842cee7..3aa40f1b20f5 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -304,8 +304,11 @@ OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE); OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); +OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR); OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); +OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI); OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); +OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR); OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 6c9a65b52e63..b3b8930ac52f 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -896,3 +896,5 @@ EXPORT_SYMBOL_GPL(opal_leds_get_ind); EXPORT_SYMBOL_GPL(opal_leds_set_ind); /* Export this symbol for PowerNV Operator Panel class driver */ EXPORT_SYMBOL_GPL(opal_write_oppanel_async); +/* Export this for KVM */ +EXPORT_SYMBOL_GPL(opal_int_set_mfrr); diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index cb3c50328de8..cc2b281a3766 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn, vflags &= ~HPTE_V_SECONDARY; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags; spin_lock_irqsave(&ps3_htab_lock, flags); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index aa35245d8d6d..f2c98f6c1c9c 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -145,7 +145,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, hpte_group, vpn, pa, rflags, vflags, psize); hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; if (!(vflags & HPTE_V_BOLTED)) pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 0f52d7955796..4a86dcff3fcd 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -29,194 +29,7 @@ #include <asm/processor.h> #include <asm/i8259.h> #include <asm/irq.h> - -/* - * INTC Registers - */ -#define XINTC_ISR 0 /* Interrupt Status */ -#define XINTC_IPR 4 /* Interrupt Pending */ -#define XINTC_IER 8 /* Interrupt Enable */ -#define XINTC_IAR 12 /* Interrupt Acknowledge */ -#define XINTC_SIE 16 /* Set Interrupt Enable bits */ -#define XINTC_CIE 20 /* Clear Interrupt Enable bits */ -#define XINTC_IVR 24 /* Interrupt Vector */ -#define XINTC_MER 28 /* Master Enable */ - -static struct irq_domain *master_irqhost; - -#define XILINX_INTC_MAXIRQS (32) - -/* The following table allows the interrupt type, edge or level, - * to be cached after being read from the device tree until the interrupt - * is mapped - */ -static int xilinx_intc_typetable[XILINX_INTC_MAXIRQS]; - -/* Map the interrupt type from the device tree to the interrupt types - * used by the interrupt subsystem - */ -static unsigned char xilinx_intc_map_senses[] = { - IRQ_TYPE_EDGE_RISING, - IRQ_TYPE_EDGE_FALLING, - IRQ_TYPE_LEVEL_HIGH, - IRQ_TYPE_LEVEL_LOW, -}; - -/* - * The interrupt controller is setup such that it doesn't work well with - * the level interrupt handler in the kernel because the handler acks the - * interrupt before calling the application interrupt handler. To deal with - * that, we use 2 different irq chips so that different functions can be - * used for level and edge type interrupts. - * - * IRQ Chip common (across level and edge) operations - */ -static void xilinx_intc_mask(struct irq_data *d) -{ - int irq = irqd_to_hwirq(d); - void * regs = irq_data_get_irq_chip_data(d); - pr_debug("mask: %d\n", irq); - out_be32(regs + XINTC_CIE, 1 << irq); -} - -static int xilinx_intc_set_type(struct irq_data *d, unsigned int flow_type) -{ - return 0; -} - -/* - * IRQ Chip level operations - */ -static void xilinx_intc_level_unmask(struct irq_data *d) -{ - int irq = irqd_to_hwirq(d); - void * regs = irq_data_get_irq_chip_data(d); - pr_debug("unmask: %d\n", irq); - out_be32(regs + XINTC_SIE, 1 << irq); - - /* ack level irqs because they can't be acked during - * ack function since the handle_level_irq function - * acks the irq before calling the inerrupt handler - */ - out_be32(regs + XINTC_IAR, 1 << irq); -} - -static struct irq_chip xilinx_intc_level_irqchip = { - .name = "Xilinx Level INTC", - .irq_mask = xilinx_intc_mask, - .irq_mask_ack = xilinx_intc_mask, - .irq_unmask = xilinx_intc_level_unmask, - .irq_set_type = xilinx_intc_set_type, -}; - -/* - * IRQ Chip edge operations - */ -static void xilinx_intc_edge_unmask(struct irq_data *d) -{ - int irq = irqd_to_hwirq(d); - void *regs = irq_data_get_irq_chip_data(d); - pr_debug("unmask: %d\n", irq); - out_be32(regs + XINTC_SIE, 1 << irq); -} - -static void xilinx_intc_edge_ack(struct irq_data *d) -{ - int irq = irqd_to_hwirq(d); - void * regs = irq_data_get_irq_chip_data(d); - pr_debug("ack: %d\n", irq); - out_be32(regs + XINTC_IAR, 1 << irq); -} - -static struct irq_chip xilinx_intc_edge_irqchip = { - .name = "Xilinx Edge INTC", - .irq_mask = xilinx_intc_mask, - .irq_unmask = xilinx_intc_edge_unmask, - .irq_ack = xilinx_intc_edge_ack, - .irq_set_type = xilinx_intc_set_type, -}; - -/* - * IRQ Host operations - */ - -/** - * xilinx_intc_xlate - translate virq# from device tree interrupts property - */ -static int xilinx_intc_xlate(struct irq_domain *h, struct device_node *ct, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, - unsigned int *out_flags) -{ - if ((intsize < 2) || (intspec[0] >= XILINX_INTC_MAXIRQS)) - return -EINVAL; - - /* keep a copy of the interrupt type til the interrupt is mapped - */ - xilinx_intc_typetable[intspec[0]] = xilinx_intc_map_senses[intspec[1]]; - - /* Xilinx uses 2 interrupt entries, the 1st being the h/w - * interrupt number, the 2nd being the interrupt type, edge or level - */ - *out_hwirq = intspec[0]; - *out_flags = xilinx_intc_map_senses[intspec[1]]; - - return 0; -} -static int xilinx_intc_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t irq) -{ - irq_set_chip_data(virq, h->host_data); - - if (xilinx_intc_typetable[irq] == IRQ_TYPE_LEVEL_HIGH || - xilinx_intc_typetable[irq] == IRQ_TYPE_LEVEL_LOW) { - irq_set_chip_and_handler(virq, &xilinx_intc_level_irqchip, - handle_level_irq); - } else { - irq_set_chip_and_handler(virq, &xilinx_intc_edge_irqchip, - handle_edge_irq); - } - return 0; -} - -static const struct irq_domain_ops xilinx_intc_ops = { - .map = xilinx_intc_map, - .xlate = xilinx_intc_xlate, -}; - -struct irq_domain * __init -xilinx_intc_init(struct device_node *np) -{ - struct irq_domain * irq; - void * regs; - - /* Find and map the intc registers */ - regs = of_iomap(np, 0); - if (!regs) { - pr_err("xilinx_intc: could not map registers\n"); - return NULL; - } - - /* Setup interrupt controller */ - out_be32(regs + XINTC_IER, 0); /* disable all irqs */ - out_be32(regs + XINTC_IAR, ~(u32) 0); /* Acknowledge pending irqs */ - out_be32(regs + XINTC_MER, 0x3UL); /* Turn on the Master Enable. */ - - /* Allocate and initialize an irq_domain structure. */ - irq = irq_domain_add_linear(np, XILINX_INTC_MAXIRQS, &xilinx_intc_ops, - regs); - if (!irq) - panic(__FILE__ ": Cannot allocate IRQ host\n"); - - return irq; -} - -int xilinx_intc_get_irq(void) -{ - void * regs = master_irqhost->host_data; - pr_debug("get_irq:\n"); - return irq_linear_revmap(master_irqhost, in_be32(regs + XINTC_IVR)); -} +#include <linux/irqchip.h> #if defined(CONFIG_PPC_I8259) /* @@ -265,31 +78,11 @@ static void __init xilinx_i8259_setup_cascade(void) static inline void xilinx_i8259_setup_cascade(void) { return; } #endif /* defined(CONFIG_PPC_I8259) */ -static const struct of_device_id xilinx_intc_match[] __initconst = { - { .compatible = "xlnx,opb-intc-1.00.c", }, - { .compatible = "xlnx,xps-intc-1.00.a", }, - {} -}; - /* * Initialize master Xilinx interrupt controller */ void __init xilinx_intc_init_tree(void) { - struct device_node *np; - - /* find top level interrupt controller */ - for_each_matching_node(np, xilinx_intc_match) { - if (!of_get_property(np, "interrupts", NULL)) - break; - } - BUG_ON(!np); - - master_irqhost = xilinx_intc_init(np); - BUG_ON(!master_irqhost); - - irq_set_default_host(master_irqhost); - of_node_put(np); - + irqchip_init(); xilinx_i8259_setup_cascade(); } |