diff options
63 files changed, 2343 insertions, 1377 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2fdb23313dd5..94802149255a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -93,8 +93,10 @@ config X86 select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP + select GENERIC_IRQ_MATRIX_ALLOCATOR if X86_LOCAL_APIC select GENERIC_IRQ_MIGRATION if SMP select GENERIC_IRQ_PROBE + select GENERIC_IRQ_RESERVATION_MODE select GENERIC_IRQ_SHOW select GENERIC_PENDING_IRQ if SMP select GENERIC_SMP_IDLE_THREAD diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 5f01671c68f2..a9e57f08bfa6 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -53,6 +53,15 @@ extern int local_apic_timer_c2_ok; extern int disable_apic; extern unsigned int lapic_timer_frequency; +extern enum apic_intr_mode_id apic_intr_mode; +enum apic_intr_mode_id { + APIC_PIC, + APIC_VIRTUAL_WIRE, + APIC_VIRTUAL_WIRE_NO_CONFIG, + APIC_SYMMETRIC_IO, + APIC_SYMMETRIC_IO_NO_ROUTING +}; + #ifdef CONFIG_SMP extern void __inquire_remote_apic(int apicid); #else /* CONFIG_SMP */ @@ -127,14 +136,13 @@ extern void disconnect_bsp_APIC(int virt_wire_setup); extern void disable_local_APIC(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); -extern void init_bsp_APIC(void); +extern void apic_intr_mode_init(void); extern void setup_local_APIC(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); extern void setup_boot_APIC_clock(void); extern void setup_secondary_APIC_clock(void); extern void lapic_update_tsc_freq(void); -extern int APIC_init_uniprocessor(void); #ifdef CONFIG_X86_64 static inline int apic_force_enable(unsigned long addr) @@ -145,7 +153,7 @@ static inline int apic_force_enable(unsigned long addr) extern int apic_force_enable(unsigned long addr); #endif -extern int apic_bsp_setup(bool upmode); +extern void apic_bsp_setup(bool upmode); extern void apic_ap_setup(void); /* @@ -161,6 +169,10 @@ static inline int apic_is_clustered_box(void) #endif extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); +extern void lapic_assign_system_vectors(void); +extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); +extern void lapic_online(void); +extern void lapic_offline(void); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } @@ -170,6 +182,9 @@ static inline void disable_local_APIC(void) { } # define setup_boot_APIC_clock x86_init_noop # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } +static inline void apic_intr_mode_init(void) { } +static inline void lapic_assign_system_vectors(void) { } +static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC @@ -265,73 +280,63 @@ struct irq_data; * James Cleverdon. */ struct apic { - char *name; - - int (*probe)(void); - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - int (*apic_id_valid)(int apicid); - int (*apic_id_registered)(void); - - u32 irq_delivery_mode; - u32 irq_dest_mode; - - const struct cpumask *(*target_cpus)(void); - - int disable_esr; - - int dest_logical; - unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); - - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, - const struct cpumask *mask); - void (*init_apic_ldr)(void); - - void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); - - void (*setup_apic_routing)(void); - int (*cpu_present_to_apicid)(int mps_cpu); - void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); - int (*check_phys_apicid_present)(int phys_apicid); - int (*phys_pkg_id)(int cpuid_apic, int index_msb); - - unsigned int (*get_apic_id)(unsigned long x); - /* Can't be NULL on 64-bit */ - unsigned long (*set_apic_id)(unsigned int id); - - int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, - struct irq_data *irqdata, - unsigned int *apicid); - - /* ipi */ - void (*send_IPI)(int cpu, int vector); - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); - void (*send_IPI_allbutself)(int vector); - void (*send_IPI_all)(int vector); - void (*send_IPI_self)(int vector); + /* Hotpath functions first */ + void (*eoi_write)(u32 reg, u32 v); + void (*native_eoi_write)(u32 reg, u32 v); + void (*write)(u32 reg, u32 v); + u32 (*read)(u32 reg); + + /* IPI related functions */ + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); + + void (*send_IPI)(int cpu, int vector); + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); + + /* dest_logical is used by the IPI functions */ + u32 dest_logical; + u32 disable_esr; + u32 irq_delivery_mode; + u32 irq_dest_mode; + + /* Functions and data related to vector allocation */ + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, + const struct cpumask *mask); + int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, + struct irq_data *irqdata, + unsigned int *apicid); + u32 (*calc_dest_apicid)(unsigned int cpu); + + /* ICR related functions */ + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + + /* Probe, setup and smpboot functions */ + int (*probe)(void); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_valid)(int apicid); + int (*apic_id_registered)(void); + + bool (*check_apicid_used)(physid_mask_t *map, int apicid); + void (*init_apic_ldr)(void); + void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); + void (*setup_apic_routing)(void); + int (*cpu_present_to_apicid)(int mps_cpu); + void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); + int (*check_phys_apicid_present)(int phys_apicid); + int (*phys_pkg_id)(int cpuid_apic, int index_msb); + + u32 (*get_apic_id)(unsigned long x); + u32 (*set_apic_id)(unsigned int id); /* wakeup_secondary_cpu */ - int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); + int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); - void (*inquire_remote_apic)(int apicid); - - /* apic ops */ - u32 (*read)(u32 reg); - void (*write)(u32 reg, u32 v); - /* - * ->eoi_write() has the same signature as ->write(). - * - * Drivers can support both ->eoi_write() and ->write() by passing the same - * callback value. Kernel can override ->eoi_write() and fall back - * on write for EOI. - */ - void (*eoi_write)(u32 reg, u32 v); - void (*native_eoi_write)(u32 reg, u32 v); - u64 (*icr_read)(void); - void (*icr_write)(u32 low, u32 high); - void (*wait_icr_idle)(void); - u32 (*safe_wait_icr_idle)(void); + void (*inquire_remote_apic)(int apicid); #ifdef CONFIG_X86_32 /* @@ -346,6 +351,7 @@ struct apic { */ int (*x86_32_early_logical_apicid)(int cpu); #endif + char *name; }; /* @@ -380,6 +386,7 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[]; */ #ifdef CONFIG_SMP extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); +extern int lapic_can_unplug_cpu(void); #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -463,84 +470,33 @@ static inline unsigned default_get_apic_id(unsigned long x) extern void apic_send_IPI_self(int vector); DECLARE_PER_CPU(int, x2apic_extra_bits); - -extern int default_cpu_present_to_apicid(int mps_cpu); -extern int default_check_phys_apicid_present(int phys_apicid); #endif extern void generic_bigsmp_probe(void); - #ifdef CONFIG_X86_LOCAL_APIC #include <asm/smp.h> #define APIC_DFR_VALUE (APIC_DFR_FLAT) -static inline const struct cpumask *default_target_cpus(void) -{ -#ifdef CONFIG_SMP - return cpu_online_mask; -#else - return cpumask_of(0); -#endif -} - -static inline const struct cpumask *online_target_cpus(void) -{ - return cpu_online_mask; -} - DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); +extern struct apic apic_noop; static inline unsigned int read_apic_id(void) { - unsigned int reg; - - reg = apic_read(APIC_ID); + unsigned int reg = apic_read(APIC_ID); return apic->get_apic_id(reg); } -static inline int default_apic_id_valid(int apicid) -{ - return (apicid < 255); -} - +extern int default_apic_id_valid(int apicid); extern int default_acpi_madt_oem_check(char *, char *); - extern void default_setup_apic_routing(void); -extern struct apic apic_noop; - -#ifdef CONFIG_X86_32 - -static inline int noop_x86_32_early_logical_apicid(int cpu) -{ - return BAD_APICID; -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -extern void default_init_apic_ldr(void); - -static inline int default_apic_id_registered(void) -{ - return physid_isset(read_apic_id(), phys_cpu_present_map); -} - -static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif +extern u32 apic_default_calc_apicid(unsigned int cpu); +extern u32 apic_flat_calc_apicid(unsigned int cpu); extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask, struct irq_data *irqdata, @@ -548,71 +504,17 @@ extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask, extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask, struct irq_data *irqdata, unsigned int *apicid); - -static inline void -flat_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - -static inline void -default_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask) -{ - cpumask_copy(retmask, cpumask_of(cpu)); -} - -static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) -{ - return physid_isset(apicid, *map); -} - -static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - *retmap = *phys_map; -} - -static inline int __default_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline int -__default_check_phys_apicid_present(int phys_apicid) -{ - return physid_isset(phys_apicid, phys_cpu_present_map); -} - -#ifdef CONFIG_X86_32 -static inline int default_cpu_present_to_apicid(int mps_cpu) -{ - return __default_cpu_present_to_apicid(mps_cpu); -} - -static inline int -default_check_phys_apicid_present(int phys_apicid) -{ - return __default_check_phys_apicid_present(phys_apicid); -} -#else +extern bool default_check_apicid_used(physid_mask_t *map, int apicid); +extern void flat_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask); +extern void default_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask); +extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); -#endif #endif /* CONFIG_X86_LOCAL_APIC */ + extern void irq_enter(void); extern void irq_exit(void); diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 0a3e808b9123..4011cb03ef08 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -393,7 +393,7 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) void update_intr_gate(unsigned int n, const void *addr); void alloc_intr_gate(unsigned int n, const void *addr); -extern unsigned long used_vectors[]; +extern unsigned long system_vectors[]; #ifdef CONFIG_X86_64 DECLARE_PER_CPU(u32, debug_idt_ctr); diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 8ec99a55e6b9..b80e46733909 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -16,6 +16,8 @@ #include <asm/irq_vectors.h> +#define IRQ_MATRIX_BITS NR_VECTORS + #ifndef __ASSEMBLY__ #include <linux/percpu.h> @@ -123,15 +125,13 @@ struct irq_alloc_info { struct irq_cfg { unsigned int dest_apicid; - u8 vector; - u8 old_vector; + unsigned int vector; }; extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); extern void lock_vector_lock(void); extern void unlock_vector_lock(void); -extern void setup_vector_irq(int cpu); #ifdef CONFIG_SMP extern void send_cleanup_vector(struct irq_cfg *); extern void irq_complete_move(struct irq_cfg *cfg); diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 5c27e146a166..a8834dd546cd 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -193,7 +193,6 @@ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); extern void disable_IO_APIC(void); -extern void setup_ioapic_dest(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin); extern void print_IO_APICs(void); #else /* !CONFIG_X86_IO_APIC */ @@ -233,7 +232,6 @@ static inline void io_apic_init_mappings(void) { } static inline void setup_IO_APIC(void) { } static inline void enable_IO_APIC(void) { } -static inline void setup_ioapic_dest(void) { } #endif diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index d8632f8fa17d..2395bb794c7b 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -26,11 +26,7 @@ extern void irq_ctx_init(int cpu); struct irq_desc; -#ifdef CONFIG_HOTPLUG_CPU -#include <linux/cpumask.h> -extern int check_irq_vectors_for_cpu_disable(void); extern void fixup_irqs(void); -#endif #ifdef CONFIG_HAVE_KVM extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index c20ffca8fef1..67421f649cfa 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,12 +102,8 @@ #define POSTED_INTR_NESTED_VECTOR 0xf0 #endif -/* - * Local APIC timer IRQ vector is on a different priority level, - * to work around the 'lost local interrupt if more than 2 IRQ - * sources per level' errata. - */ -#define LOCAL_TIMER_VECTOR 0xef +#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef +#define LOCAL_TIMER_VECTOR 0xee #define NR_VECTORS 256 diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index 423e112c1e8f..f695cc6b8e1f 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -9,6 +9,7 @@ enum { /* Allocate contiguous CPU vectors */ X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, + X86_IRQ_ALLOC_LEGACY = 0x2, }; extern struct irq_domain *x86_vector_domain; @@ -42,8 +43,8 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg); extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); -extern void mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data); +extern int mp_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool early); extern void mp_irqdomain_deactivate(struct irq_domain *domain, struct irq_data *irq_data); extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c73e493adf07..9d7d856b2d89 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1419,7 +1419,7 @@ static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} static inline int kvm_cpu_get_apicid(int mps_cpu) { #ifdef CONFIG_X86_LOCAL_APIC - return __default_cpu_present_to_apicid(mps_cpu); + return default_cpu_present_to_apicid(mps_cpu); #else WARN_ON_ONCE(1); return BAD_APICID; diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 8eb139ed1a03..84b9ec0c1bc0 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -138,6 +138,254 @@ DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic); DEFINE_IRQ_VECTOR_EVENT(thermal_apic); #endif +TRACE_EVENT(vector_config, + + TP_PROTO(unsigned int irq, unsigned int vector, + unsigned int cpu, unsigned int apicdest), + + TP_ARGS(irq, vector, cpu, apicdest), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, vector ) + __field( unsigned int, cpu ) + __field( unsigned int, apicdest ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->vector = vector; + __entry->cpu = cpu; + __entry->apicdest = apicdest; + ), + + TP_printk("irq=%u vector=%u cpu=%u apicdest=0x%08x", + __entry->irq, __entry->vector, __entry->cpu, + __entry->apicdest) +); + +DECLARE_EVENT_CLASS(vector_mod, + + TP_PROTO(unsigned int irq, unsigned int vector, + unsigned int cpu, unsigned int prev_vector, + unsigned int prev_cpu), + + TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, vector ) + __field( unsigned int, cpu ) + __field( unsigned int, prev_vector ) + __field( unsigned int, prev_cpu ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->vector = vector; + __entry->cpu = cpu; + __entry->prev_vector = prev_vector; + __entry->prev_cpu = prev_cpu; + + ), + + TP_printk("irq=%u vector=%u cpu=%u prev_vector=%u prev_cpu=%u", + __entry->irq, __entry->vector, __entry->cpu, + __entry->prev_vector, __entry->prev_cpu) +); + +#define DEFINE_IRQ_VECTOR_MOD_EVENT(name) \ +DEFINE_EVENT_FN(vector_mod, name, \ + TP_PROTO(unsigned int irq, unsigned int vector, \ + unsigned int cpu, unsigned int prev_vector, \ + unsigned int prev_cpu), \ + TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu), NULL, NULL); \ + +DEFINE_IRQ_VECTOR_MOD_EVENT(vector_update); +DEFINE_IRQ_VECTOR_MOD_EVENT(vector_clear); + +DECLARE_EVENT_CLASS(vector_reserve, + + TP_PROTO(unsigned int irq, int ret), + + TP_ARGS(irq, ret), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%u ret=%d", __entry->irq, __entry->ret) +); + +#define DEFINE_IRQ_VECTOR_RESERVE_EVENT(name) \ +DEFINE_EVENT_FN(vector_reserve, name, \ + TP_PROTO(unsigned int irq, int ret), \ + TP_ARGS(irq, ret), NULL, NULL); \ + +DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve_managed); +DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve); + +TRACE_EVENT(vector_alloc, + + TP_PROTO(unsigned int irq, unsigned int vector, bool reserved, + int ret), + + TP_ARGS(irq, vector, ret, reserved), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, vector ) + __field( bool, reserved ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->vector = ret < 0 ? 0 : vector; + __entry->reserved = reserved; + __entry->ret = ret > 0 ? 0 : ret; + ), + + TP_printk("irq=%u vector=%u reserved=%d ret=%d", + __entry->irq, __entry->vector, + __entry->reserved, __entry->ret) +); + +TRACE_EVENT(vector_alloc_managed, + + TP_PROTO(unsigned int irq, unsigned int vector, + int ret), + + TP_ARGS(irq, vector, ret), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, vector ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->vector = ret < 0 ? 0 : vector; + __entry->ret = ret > 0 ? 0 : ret; + ), + + TP_printk("irq=%u vector=%u ret=%d", + __entry->irq, __entry->vector, __entry->ret) +); + +DECLARE_EVENT_CLASS(vector_activate, + + TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve, + bool early), + + TP_ARGS(irq, is_managed, can_reserve, early), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( bool, is_managed ) + __field( bool, can_reserve ) + __field( bool, early ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->is_managed = is_managed; + __entry->can_reserve = can_reserve; + __entry->early = early; + ), + + TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d", + __entry->irq, __entry->is_managed, __entry->can_reserve, + __entry->early) +); + +#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \ +DEFINE_EVENT_FN(vector_activate, name, \ + TP_PROTO(unsigned int irq, bool is_managed, \ + bool can_reserve, bool early), \ + TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \ + +DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate); +DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate); + +TRACE_EVENT(vector_teardown, + + TP_PROTO(unsigned int irq, bool is_managed, bool has_reserved), + + TP_ARGS(irq, is_managed, has_reserved), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( bool, is_managed ) + __field( bool, has_reserved ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->is_managed = is_managed; + __entry->has_reserved = has_reserved; + ), + + TP_printk("irq=%u is_managed=%d has_reserved=%d", + __entry->irq, __entry->is_managed, __entry->has_reserved) +); + +TRACE_EVENT(vector_setup, + + TP_PROTO(unsigned int irq, bool is_legacy, int ret), + + TP_ARGS(irq, is_legacy, ret), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( bool, is_legacy ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->is_legacy = is_legacy; + __entry->ret = ret; + ), + + TP_printk("irq=%u is_legacy=%d ret=%d", + __entry->irq, __entry->is_legacy, __entry->ret) +); + +TRACE_EVENT(vector_free_moved, + + TP_PROTO(unsigned int irq, unsigned int cpu, unsigned int vector, + bool is_managed), + + TP_ARGS(irq, cpu, vector, is_managed), + + TP_STRUCT__entry( + __field( unsigned int, irq ) + __field( unsigned int, cpu ) + __field( unsigned int, vector ) + __field( bool, is_managed ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->cpu = cpu; + __entry->vector = vector; + __entry->is_managed = is_managed; + ), + + TP_printk("irq=%u cpu=%u vector=%u is_managed=%d", + __entry->irq, __entry->cpu, __entry->vector, + __entry->is_managed) +); + + #endif /* CONFIG_X86_LOCAL_APIC */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h deleted file mode 100644 index 78ccf28d17db..000000000000 --- a/arch/x86/include/asm/x2apic.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Common bits for X2APIC cluster/physical modes. - */ - -#ifndef _ASM_X86_X2APIC_H -#define _ASM_X86_X2APIC_H - -#include <asm/apic.h> -#include <asm/ipi.h> -#include <linux/cpumask.h> - -static int x2apic_apic_id_valid(int apicid) -{ - return 1; -} - -static int x2apic_apic_id_registered(void) -{ - return 1; -} - -static void -__x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) -{ - unsigned long cfg = __prepare_ICR(0, vector, dest); - native_x2apic_icr_write(cfg, apicid); -} - -static unsigned int x2apic_get_apic_id(unsigned long id) -{ - return id; -} - -static unsigned long x2apic_set_apic_id(unsigned int id) -{ - return id; -} - -static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) -{ - return initial_apicid >> index_msb; -} - -static void x2apic_send_IPI_self(int vector) -{ - apic_write(APIC_SELF_IPI, vector); -} - -#endif /* _ASM_X86_X2APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 8a1ebf9540dd..63d0eb250792 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -51,11 +51,13 @@ struct x86_init_resources { * are set up. * @intr_init: interrupt init code * @trap_init: platform specific trap setup + * @intr_mode_init: interrupt delivery mode setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); void (*trap_init)(void); + void (*intr_mode_init)(void); }; /** diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 2fb7309c6900..a9e08924927e 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -7,7 +7,7 @@ # In particualr, smp_apic_timer_interrupt() is called in random places. KCOV_INSTRUMENT := n -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o vector.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_common.o apic_noop.o ipi.o vector.o obj-y += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ff891772c9f8..132bf45c943a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -211,11 +211,7 @@ static inline int lapic_get_version(void) */ static inline int lapic_is_integrated(void) { -#ifdef CONFIG_X86_64 - return 1; -#else return APIC_INTEGRATED(lapic_get_version()); -#endif } /* @@ -298,14 +294,11 @@ int get_physical_broadcast(void) */ int lapic_get_maxlvt(void) { - unsigned int v; - - v = apic_read(APIC_LVR); /* * - we always have APIC integrated on 64bit mode * - 82489DXs do not report # of LVT entries */ - return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; + return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2; } /* @@ -1229,53 +1222,100 @@ void __init sync_Arb_IDs(void) APIC_INT_LEVELTRIG | APIC_DM_INIT); } -/* - * An initial setup of the virtual wire mode. - */ -void __init init_bsp_APIC(void) +enum apic_intr_mode_id apic_intr_mode; + +static int __init apic_intr_mode_select(void) { - unsigned int value; + /* Check kernel option */ + if (disable_apic) { + pr_info("APIC disabled via kernel command line\n"); + return APIC_PIC; + } - /* - * Don't do the setup now if we have a SMP BIOS as the - * through-I/O-APIC virtual wire mode might be active. - */ - if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) - return; + /* Check BIOS */ +#ifdef CONFIG_X86_64 + /* On 64-bit, the APIC must be integrated, Check local APIC only */ + if (!boot_cpu_has(X86_FEATURE_APIC)) { + disable_apic = 1; + pr_info("APIC disabled by BIOS\n"); + return APIC_PIC; + } +#else + /* On 32-bit, the APIC may be integrated APIC or 82489DX */ - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); + /* Neither 82489DX nor integrated APIC ? */ + if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) { + disable_apic = 1; + return APIC_PIC; + } - /* - * Enable APIC. - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; + /* If the BIOS pretends there is an integrated APIC ? */ + if (!boot_cpu_has(X86_FEATURE_APIC) && + APIC_INTEGRATED(boot_cpu_apic_version)) { + disable_apic = 1; + pr_err(FW_BUG "Local APIC %d not detected, force emulation\n", + boot_cpu_physical_apicid); + return APIC_PIC; + } +#endif -#ifdef CONFIG_X86_32 - /* This bit is reserved on P4/Xeon and should be cleared */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 15)) - value &= ~APIC_SPIV_FOCUS_DISABLED; - else + /* Check MP table or ACPI MADT configuration */ + if (!smp_found_config) { + disable_ioapic_support(); + if (!acpi_lapic) { + pr_info("APIC: ACPI MADT or MP tables are not detected\n"); + return APIC_VIRTUAL_WIRE_NO_CONFIG; + } + return APIC_VIRTUAL_WIRE; + } + +#ifdef CONFIG_SMP + /* If SMP should be disabled, then really disable it! */ + if (!setup_max_cpus) { + pr_info("APIC: SMP mode deactivated\n"); + return APIC_SYMMETRIC_IO_NO_ROUTING; + } + + if (read_apic_id() != boot_cpu_physical_apicid) { + panic("Boot APIC ID in local APIC unexpected (%d vs %d)", + read_apic_id(), boot_cpu_physical_apicid); + /* Or can we switch back to PIC here? */ + } #endif - value |= APIC_SPIV_FOCUS_DISABLED; - value |= SPURIOUS_APIC_VECTOR; - apic_write(APIC_SPIV, value); - /* - * Set up the virtual wire mode. - */ - apic_write(APIC_LVT0, APIC_DM_EXTINT); - value = APIC_DM_NMI; - if (!lapic_is_integrated()) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - if (apic_extnmi == APIC_EXTNMI_NONE) - value |= APIC_LVT_MASKED; - apic_write(APIC_LVT1, value); + return APIC_SYMMETRIC_IO; +} + +/* Init the interrupt delivery mode for the BSP */ +void __init apic_intr_mode_init(void) +{ + bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); + + apic_intr_mode = apic_intr_mode_select(); + + switch (apic_intr_mode) { + case APIC_PIC: + pr_info("APIC: Keep in PIC mode(8259)\n"); + return; + case APIC_VIRTUAL_WIRE: + pr_info("APIC: Switch to virtual wire mode setup\n"); + default_setup_apic_routing(); + break; + case APIC_VIRTUAL_WIRE_NO_CONFIG: + pr_info("APIC: Switch to virtual wire mode setup with no configuration\n"); + upmode = true; + default_setup_apic_routing(); + break; + case APIC_SYMMETRIC_IO: + pr_info("APIC: Switch to symmetric I/O mode setup\n"); + default_setup_apic_routing(); + break; + case APIC_SYMMETRIC_IO_NO_ROUTING: + pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n"); + break; + } + + apic_bsp_setup(upmode); } static void lapic_setup_esr(void) @@ -1499,7 +1539,9 @@ void setup_local_APIC(void) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; - if (!lapic_is_integrated()) /* 82489DX */ + + /* Is 82489DX ? */ + if (!lapic_is_integrated()) value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); @@ -1885,8 +1927,8 @@ void __init init_apic_mappings(void) * yeah -- we lie about apic_version * in case if apic was disabled via boot option * but it's not a problem for SMP compiled kernel - * since smp_sanity_check is prepared for such a case - * and disable smp mode + * since apic_intr_mode_select is prepared for such + * a case and disable smp mode */ boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); } @@ -2242,44 +2284,6 @@ int hard_smp_processor_id(void) return read_apic_id(); } -void default_init_apic_ldr(void) -{ - unsigned long val; - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); - apic_write(APIC_LDR, val); -} - -int default_cpu_mask_to_apicid(const struct cpumask *mask, - struct irq_data *irqdata, - unsigned int *apicid) -{ - unsigned int cpu = cpumask_first(mask); - - if (cpu >= nr_cpu_ids) - return -EINVAL; - *apicid = per_cpu(x86_cpu_to_apicid, cpu); - irq_data_update_effective_affinity(irqdata, cpumask_of(cpu)); - return 0; -} - -int flat_cpu_mask_to_apicid(const struct cpumask *mask, - struct irq_data *irqdata, - unsigned int *apicid) - -{ - struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata); - unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS; - - if (!cpu_mask) - return -EINVAL; - *apicid = (unsigned int)cpu_mask; - cpumask_bits(effmsk)[0] = cpu_mask; - return 0; -} - /* * Override the generic EOI implementation with an optimized version. * Only called during early boot when only one CPU is active and with @@ -2322,72 +2326,27 @@ static void __init apic_bsp_up_setup(void) * Returns: * apic_id of BSP APIC */ -int __init apic_bsp_setup(bool upmode) +void __init apic_bsp_setup(bool upmode) { - int id; - connect_bsp_APIC(); if (upmode) apic_bsp_up_setup(); setup_local_APIC(); - if (x2apic_mode) - id = apic_read(APIC_LDR); - else - id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); - enable_IO_APIC(); end_local_APIC_setup(); irq_remap_enable_fault_handling(); setup_IO_APIC(); - /* Setup local timer */ - x86_init.timers.setup_percpu_clockev(); - return id; -} - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ -int __init APIC_init_uniprocessor(void) -{ - if (disable_apic) { - pr_info("Apic disabled\n"); - return -1; - } -#ifdef CONFIG_X86_64 - if (!boot_cpu_has(X86_FEATURE_APIC)) { - disable_apic = 1; - pr_info("Apic disabled by BIOS\n"); - return -1; - } -#else - if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC)) - return -1; - - /* - * Complain if the BIOS pretends there is one. - */ - if (!boot_cpu_has(X86_FEATURE_APIC) && - APIC_INTEGRATED(boot_cpu_apic_version)) { - pr_err("BIOS bug, local APIC 0x%x not detected!...\n", - boot_cpu_physical_apicid); - return -1; - } -#endif - - if (!smp_found_config) - disable_ioapic_support(); - - default_setup_apic_routing(); - apic_bsp_setup(true); - return 0; } #ifdef CONFIG_UP_LATE_INIT void __init up_late_init(void) { - APIC_init_uniprocessor(); + if (apic_intr_mode == APIC_PIC) + return; + + /* Setup local timer */ + x86_init.timers.setup_percpu_clockev(); } #endif diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c new file mode 100644 index 000000000000..a360801779ae --- /dev/null +++ b/arch/x86/kernel/apic/apic_common.c @@ -0,0 +1,46 @@ +/* + * Common functions shared between the various APIC flavours + * + * SPDX-License-Identifier: GPL-2.0 + */ +#include <linux/irq.h> +#include <asm/apic.h> + +u32 apic_default_calc_apicid(unsigned int cpu) +{ + return per_cpu(x86_cpu_to_apicid, cpu); +} + +u32 apic_flat_calc_apicid(unsigned int cpu) +{ + return 1U << cpu; +} + +bool default_check_apicid_used(physid_mask_t *map, int apicid) +{ + return physid_isset(apicid, *map); +} + +void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) +{ + *retmap = *phys_map; +} + +int default_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} +EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid); + +int default_check_phys_apicid_present(int phys_apicid) +{ + return physid_isset(phys_apicid, phys_cpu_present_map); +} + +int default_apic_id_valid(int apicid) +{ + return (apicid < 255); +} diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index dedd5a41ba48..aa85690e9b64 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -119,7 +119,7 @@ static unsigned int flat_get_apic_id(unsigned long x) return (x >> 24) & 0xFF; } -static unsigned long set_apic_id(unsigned int id) +static u32 set_apic_id(unsigned int id) { return (id & 0xFF) << 24; } @@ -154,12 +154,10 @@ static struct apic apic_flat __ro_after_init = { .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ - .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, - .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -172,7 +170,7 @@ static struct apic apic_flat __ro_after_init = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .calc_dest_apicid = apic_flat_calc_apicid, .send_IPI = default_send_IPI_single, .send_IPI_mask = flat_send_IPI_mask, @@ -249,12 +247,10 @@ static struct apic apic_physflat __ro_after_init = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .vector_allocation_domain = default_vector_allocation_domain, /* not needed, but shouldn't hurt: */ .init_apic_ldr = flat_init_apic_ldr, @@ -268,7 +264,7 @@ static struct apic apic_physflat __ro_after_init = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .calc_dest_apicid = apic_default_calc_apicid, .send_IPI = default_send_IPI_single_phys, .send_IPI_mask = default_send_IPI_mask_sequence_phys, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index c8d211277315..7b659c4480c9 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -84,20 +84,6 @@ static int noop_apic_id_registered(void) return physid_isset(0, phys_cpu_present_map); } -static const struct cpumask *noop_target_cpus(void) -{ - /* only BSP here */ - return cpumask_of(0); -} - -static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask) -{ - if (cpu != 0) - pr_warning("APIC: Vector allocated for non-BSP cpu\n"); - cpumask_copy(retmask, cpumask_of(cpu)); -} - static u32 noop_apic_read(u32 reg) { WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); @@ -109,6 +95,13 @@ static void noop_apic_write(u32 reg, u32 v) WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); } +#ifdef CONFIG_X86_32 +static int noop_x86_32_early_logical_apicid(int cpu) +{ + return BAD_APICID; +} +#endif + struct apic apic_noop __ro_after_init = { .name = "noop", .probe = noop_probe, @@ -121,12 +114,10 @@ struct apic apic_noop __ro_after_init = { /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, - .target_cpus = noop_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = default_check_apicid_used, - .vector_allocation_domain = noop_vector_allocation_domain, .init_apic_ldr = noop_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, @@ -142,7 +133,7 @@ struct apic apic_noop __ro_after_init = { .get_apic_id = noop_get_apic_id, .set_apic_id = NULL, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .calc_dest_apicid = apic_flat_calc_apicid, .send_IPI = noop_send_IPI, .send_IPI_mask = noop_send_IPI_mask, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 2fda912219a6..134e04506ab4 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -38,7 +38,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x) return id; } -static unsigned long numachip1_set_apic_id(unsigned int id) +static u32 numachip1_set_apic_id(unsigned int id) { return (id & 0xff) << 24; } @@ -51,7 +51,7 @@ static unsigned int numachip2_get_apic_id(unsigned long x) return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24); } -static unsigned long numachip2_set_apic_id(unsigned int id) +static u32 numachip2_set_apic_id(unsigned int id) { return id << 24; } @@ -249,12 +249,10 @@ static const struct apic apic_numachip1 __refconst = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -267,7 +265,7 @@ static const struct apic apic_numachip1 __refconst = { .get_apic_id = numachip1_get_apic_id, .set_apic_id = numachip1_set_apic_id, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .calc_dest_apicid = apic_default_calc_apicid, .send_IPI = numachip_send_IPI_one, .send_IPI_mask = numachip_send_IPI_mask, @@ -300,12 +298,10 @@ static const struct apic apic_numachip2 __refconst = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -318,7 +314,7 @@ static const struct apic apic_numachip2 __refconst = { .get_apic_id = numachip2_get_apic_id, .set_apic_id = numachip2_set_apic_id, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .calc_dest_apicid = apic_default_calc_apicid, .send_IPI = numachip_send_IPI_one, .send_IPI_mask = numachip_send_IPI_mask, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index e12fbcfc9571..afee386ff711 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -27,9 +27,9 @@ static int bigsmp_apic_id_registered(void) return 1; } -static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) +static bool bigsmp_check_apicid_used(physid_mask_t *map, int apicid) { - return 0; + return false; } static int bigsmp_early_logical_apicid(int cpu) @@ -155,12 +155,10 @@ static struct apic apic_bigsmp __ro_after_init = { /* phys delivery to target CPU: */ .irq_dest_mode = 0, - .target_cpus = default_target_cpus, .disable_esr = 1, .dest_logical = 0, .check_apicid_used = bigsmp_check_apicid_used, - .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = bigsmp_init_apic_ldr, .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, @@ -173,7 +171,7 @@ static struct apic apic_bigsmp __ro_after_init = { .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .calc_dest_apicid = apic_default_calc_apicid, .send_IPI = default_send_IPI_single_phys, .send_IPI_mask = default_send_IPI_mask_sequence_phys, diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c index 56ccf9346b08..b07075dce8b7 100644 --- a/arch/x86/kernel/apic/htirq.c +++ b/arch/x86/kernel/apic/htirq.c @@ -112,8 +112,8 @@ static void htirq_domain_free(struct irq_domain *domain, unsigned int virq, irq_domain_free_irqs_top(domain, virq, nr_irqs); } -static void htirq_domain_activate(struct irq_domain *domain, - struct irq_data *irq_data) +static int htirq_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool early) { struct ht_irq_msg msg; struct irq_cfg *cfg = irqd_cfg(irq_data); @@ -132,6 +132,7 @@ static void htirq_domain_activate(struct irq_domain *domain, HT_IRQ_LOW_MT_ARBITRATED) | HT_IRQ_LOW_IRQ_MASKED; write_ht_irq_msg(irq_data->irq, &msg); + return 0; } static void htirq_domain_deactivate(struct irq_domain *domain, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3b89b27945ff..201579dc5242 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1014,6 +1014,7 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain, info->ioapic_pin)) return -ENOMEM; } else { + info->flags |= X86_IRQ_ALLOC_LEGACY; irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, NULL); if (irq >= 0) { @@ -1586,6 +1587,43 @@ static int __init notimercheck(char *s) } __setup("no_timer_check", notimercheck); +static void __init delay_with_tsc(void) +{ + unsigned long long start, now; + unsigned long end = jiffies + 4; + + start = rdtsc(); + + /* + * We don't know the TSC frequency yet, but waiting for + * 40000000000/HZ TSC cycles is safe: + * 4 GHz == 10 jiffies + * 1 GHz == 40 jiffies + */ + do { + rep_nop(); + now = rdtsc(); + } while ((now - start) < 40000000000UL / HZ && + time_before_eq(jiffies, end)); +} + +static void __init delay_without_tsc(void) +{ + unsigned long end = jiffies + 4; + int band = 1; + + /* + * We don't know any frequency yet, but waiting for + * 40940000000/HZ cycles is safe: + * 4 GHz == 10 jiffies + * 1 GHz == 40 jiffies + * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094 + */ + do { + __delay(((1U << band++) * 10000000UL) / HZ); + } while (band < 12 && time_before_eq(jiffies, end)); +} + /* * There is a nasty bug in some older SMP boards, their mptable lies * about the timer IRQ. We do the following to work around the situation: @@ -1604,8 +1642,12 @@ static int __init timer_irq_works(void) local_save_flags(flags); local_irq_enable(); - /* Let ten ticks pass... */ - mdelay((10 * 1000) / HZ); + + if (boot_cpu_has(X86_FEATURE_TSC)) + delay_with_tsc(); + else + delay_without_tsc(); + local_irq_restore(flags); /* @@ -1821,26 +1863,36 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data) eoi_ioapic_pin(data->entry.vector, data); } +static void ioapic_configure_entry(struct irq_data *irqd) +{ + struct mp_chip_data *mpd = irqd->chip_data; + struct irq_cfg *cfg = irqd_cfg(irqd); + struct irq_pin_list *entry; + + /* + * Only update when the parent is the vector domain, don't touch it + * if the parent is the remapping domain. Check the installed + * ioapic chip to verify that. + */ + if (irqd->chip == &ioapic_chip) { + mpd->entry.dest = cfg->dest_apicid; + mpd->entry.vector = cfg->vector; + } + for_each_irq_pin(entry, mpd->irq_2_pin) + __ioapic_write_entry(entry->apic, entry->pin, mpd->entry); +} + static int ioapic_set_affinity(struct irq_data *irq_data, const struct cpumask *mask, bool force) { struct irq_data *parent = irq_data->parent_data; - struct mp_chip_data *data = irq_data->chip_data; - struct irq_pin_list *entry; - struct irq_cfg *cfg; unsigned long flags; int ret; ret = parent->chip->irq_set_affinity(parent, mask, force); raw_spin_lock_irqsave(&ioapic_lock, flags); - if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) { - cfg = irqd_cfg(irq_data); - data->entry.dest = cfg->dest_apicid; - data->entry.vector = cfg->vector; - for_each_irq_pin(entry, data->irq_2_pin) - __ioapic_write_entry(entry->apic, entry->pin, - data->entry); - } + if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) + ioapic_configure_entry(irq_data); raw_spin_unlock_irqrestore(&ioapic_lock, flags); return ret; @@ -2097,7 +2149,7 @@ static inline void __init check_timer(void) unmask_ioapic_irq(irq_get_irq_data(0)); } irq_domain_deactivate_irq(irq_data); - irq_domain_activate_irq(irq_data); + irq_domain_activate_irq(irq_data, false); if (timer_irq_works()) { if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); @@ -2119,7 +2171,7 @@ static inline void __init check_timer(void) */ replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2); irq_domain_deactivate_irq(irq_data); - irq_domain_activate_irq(irq_data); + irq_domain_activate_irq(irq_data, false); legacy_pic->unmask(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@ -2513,52 +2565,9 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) } /* - * This function currently is only a helper for the i386 smp boot process where - * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be apic->target_cpus() + * This function updates target affinity of IOAPIC interrupts to include + * the CPUs which came online during SMP bringup. */ -#ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) -{ - int pin, ioapic, irq, irq_entry; - const struct cpumask *mask; - struct irq_desc *desc; - struct irq_data *idata; - struct irq_chip *chip; - - if (skip_ioapic_setup == 1) - return; - - for_each_ioapic_pin(ioapic, pin) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - - irq = pin_2_irq(irq_entry, ioapic, pin, 0); - if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq)) - continue; - - desc = irq_to_desc(irq); - raw_spin_lock_irq(&desc->lock); - idata = irq_desc_get_irq_data(desc); - - /* - * Honour affinities which have been set in early boot - */ - if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata)) - mask = irq_data_get_affinity_mask(idata); - else - mask = apic->target_cpus(); - - chip = irq_data_get_irq_chip(idata); - /* Might be lapic_chip for irq 0 */ - if (chip->irq_set_affinity) - chip->irq_set_affinity(idata, mask, false); - raw_spin_unlock_irq(&desc->lock); - } -} -#endif - #define IOAPIC_RESOURCE_NAME_SIZE 11 static struct resource *ioapic_resources; @@ -2978,17 +2987,15 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, irq_domain_free_irqs_top(domain, virq, nr_irqs); } -void mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data) +int mp_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool early) { unsigned long flags; - struct irq_pin_list *entry; - struct mp_chip_data *data = irq_data->chip_data; raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, data->irq_2_pin) - __ioapic_write_entry(entry->apic, entry->pin, data->entry); + ioapic_configure_entry(irq_data); raw_spin_unlock_irqrestore(&ioapic_lock, flags); + return 0; } void mp_irqdomain_deactivate(struct irq_domain *domain, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 63287659adb6..fa22017de806 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -66,6 +66,31 @@ static void setup_apic_flat_routing(void) #endif } +static int default_apic_id_registered(void) +{ + return physid_isset(read_apic_id(), phys_cpu_present_map); +} + +/* + * Set up the logical destination ID. Intel recommends to set DFR, LDR and + * TPR before enabling an APIC. See e.g. "AP-388 82489DX User's Manual" + * (Intel document number 292116). + */ +static void default_init_apic_ldr(void) +{ + unsigned long val; + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); + apic_write(APIC_LDR, val); +} + +static int default_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + /* should be called last. */ static int probe_default(void) { @@ -84,12 +109,10 @@ static struct apic apic_default __ro_after_init = { /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, - .target_cpus = default_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = default_check_apicid_used, - .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, @@ -102,7 +125,7 @@ static struct apic apic_default __ro_after_init = { .get_apic_id = default_get_apic_id, .set_apic_id = NULL, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .calc_dest_apicid = apic_flat_calc_apicid, .send_IPI = default_send_IPI_single, .send_IPI_mask = default_send_IPI_mask_logical, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 88c214e75a6b..05c85e693a5d 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -11,6 +11,7 @@ * published by the Free Software Foundation. */ #include <linux/interrupt.h> +#include <linux/seq_file.h> #include <linux/init.h> #include <linux/compiler.h> #include <linux/slab.h> @@ -21,20 +22,30 @@ #include <asm/desc.h> #include <asm/irq_remapping.h> +#include <asm/trace/irq_vectors.h> + struct apic_chip_data { - struct irq_cfg cfg; - cpumask_var_t domain; - cpumask_var_t old_domain; - u8 move_in_progress : 1; + struct irq_cfg hw_irq_cfg; + unsigned int vector; + unsigned int prev_vector; + unsigned int cpu; + unsigned int prev_cpu; + unsigned int irq; + struct hlist_node clist; + unsigned int move_in_progress : 1, + is_managed : 1, + can_reserve : 1, + has_reserved : 1; }; struct irq_domain *x86_vector_domain; EXPORT_SYMBOL_GPL(x86_vector_domain); static DEFINE_RAW_SPINLOCK(vector_lock); -static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask; +static cpumask_var_t vector_searchmask; static struct irq_chip lapic_controller; -#ifdef CONFIG_X86_IO_APIC -static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; +static struct irq_matrix *vector_matrix; +#ifdef CONFIG_SMP +static DEFINE_PER_CPU(struct hlist_head, cleanup_list); #endif void lock_vector_lock(void) @@ -50,22 +61,37 @@ void unlock_vector_lock(void) raw_spin_unlock(&vector_lock); } -static struct apic_chip_data *apic_chip_data(struct irq_data *irq_data) +void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask) +{ + memset(info, 0, sizeof(*info)); + info->mask = mask; +} + +void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) { - if (!irq_data) + if (src) + *dst = *src; + else + memset(dst, 0, sizeof(*dst)); +} + +static struct apic_chip_data *apic_chip_data(struct irq_data *irqd) +{ + if (!irqd) return NULL; - while (irq_data->parent_data) - irq_data = irq_data->parent_data; + while (irqd->parent_data) + irqd = irqd->parent_data; - return irq_data->chip_data; + return irqd->chip_data; } -struct irq_cfg *irqd_cfg(struct irq_data *irq_data) +struct irq_cfg *irqd_cfg(struct irq_data *irqd) { - struct apic_chip_data *data = apic_chip_data(irq_data); + struct apic_chip_data *apicd = apic_chip_data(irqd); - return data ? &data->cfg : NULL; + return apicd ? &apicd->hw_irq_cfg : NULL; } EXPORT_SYMBOL_GPL(irqd_cfg); @@ -76,270 +102,395 @@ struct irq_cfg *irq_cfg(unsigned int irq) static struct apic_chip_data *alloc_apic_chip_data(int node) { - struct apic_chip_data *data; + struct apic_chip_data *apicd; - data = kzalloc_node(sizeof(*data), GFP_KERNEL, node); - if (!data) - return NULL; - if (!zalloc_cpumask_var_node(&data->domain, GFP_KERNEL, node)) - goto out_data; - if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node)) - goto out_domain; - return data; -out_domain: - free_cpumask_var(data->domain); -out_data: - kfree(data); - return NULL; -} - -static void free_apic_chip_data(struct apic_chip_data *data) -{ - if (data) { - free_cpumask_var(data->domain); - free_cpumask_var(data->old_domain); - kfree(data); + apicd = kzalloc_node(sizeof(*apicd), GFP_KERNEL, node); + if (apicd) + INIT_HLIST_NODE(&apicd->clist); + return apicd; +} + +static void free_apic_chip_data(struct apic_chip_data *apicd) +{ + kfree(apicd); +} + +static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector, + unsigned int cpu) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + + lockdep_assert_held(&vector_lock); + + apicd->hw_irq_cfg.vector = vector; + apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu); + irq_data_update_effective_affinity(irqd, cpumask_of(cpu)); + trace_vector_config(irqd->irq, vector, cpu, + apicd->hw_irq_cfg.dest_apicid); +} + +static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, + unsigned int newcpu) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + struct irq_desc *desc = irq_data_to_desc(irqd); + + lockdep_assert_held(&vector_lock); + + trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector, + apicd->cpu); + + /* Setup the vector move, if required */ + if (apicd->vector && cpu_online(apicd->cpu)) { + apicd->move_in_progress = true; + apicd->prev_vector = apicd->vector; + apicd->prev_cpu = apicd->cpu; + } else { + apicd->prev_vector = 0; } + + apicd->vector = newvec; + apicd->cpu = newcpu; + BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); + per_cpu(vector_irq, newcpu)[newvec] = desc; } -static int __assign_irq_vector(int irq, struct apic_chip_data *d, - const struct cpumask *mask, - struct irq_data *irqdata) +static void vector_assign_managed_shutdown(struct irq_data *irqd) { - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; - static int current_offset = VECTOR_OFFSET_START % 16; - int cpu, vector; + unsigned int cpu = cpumask_first(cpu_online_mask); - /* - * If there is still a move in progress or the previous move has not - * been cleaned up completely, tell the caller to come back later. - */ - if (d->move_in_progress || - cpumask_intersects(d->old_domain, cpu_online_mask)) - return -EBUSY; + apic_update_irq_cfg(irqd, MANAGED_IRQ_SHUTDOWN_VECTOR, cpu); +} - /* Only try and allocate irqs on cpus that are present */ - cpumask_clear(d->old_domain); - cpumask_clear(searched_cpumask); - cpu = cpumask_first_and(mask, cpu_online_mask); - while (cpu < nr_cpu_ids) { - int new_cpu, offset; +static int reserve_managed_vector(struct irq_data *irqd) +{ + const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); + struct apic_chip_data *apicd = apic_chip_data(irqd); + unsigned long flags; + int ret; - /* Get the possible target cpus for @mask/@cpu from the apic */ - apic->vector_allocation_domain(cpu, vector_cpumask, mask); + raw_spin_lock_irqsave(&vector_lock, flags); + apicd->is_managed = true; + ret = irq_matrix_reserve_managed(vector_matrix, affmsk); + raw_spin_unlock_irqrestore(&vector_lock, flags); + trace_vector_reserve_managed(irqd->irq, ret); + return ret; +} - /* - * Clear the offline cpus from @vector_cpumask for searching - * and verify whether the result overlaps with @mask. If true, - * then the call to apic->cpu_mask_to_apicid() will - * succeed as well. If not, no point in trying to find a - * vector in this mask. - */ - cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask); - if (!cpumask_intersects(vector_searchmask, mask)) - goto next_cpu; - - if (cpumask_subset(vector_cpumask, d->domain)) { - if (cpumask_equal(vector_cpumask, d->domain)) - goto success; - /* - * Mark the cpus which are not longer in the mask for - * cleanup. - */ - cpumask_andnot(d->old_domain, d->domain, vector_cpumask); - vector = d->cfg.vector; - goto update; - } +static void reserve_irq_vector_locked(struct irq_data *irqd) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); - vector = current_vector; - offset = current_offset; -next: - vector += 16; - if (vector >= FIRST_SYSTEM_VECTOR) { - offset = (offset + 1) % 16; - vector = FIRST_EXTERNAL_VECTOR + offset; - } + irq_matrix_reserve(vector_matrix); + apicd->can_reserve = true; + apicd->has_reserved = true; + trace_vector_reserve(irqd->irq, 0); + vector_assign_managed_shutdown(irqd); +} - /* If the search wrapped around, try the next cpu */ - if (unlikely(current_vector == vector)) - goto next_cpu; +static int reserve_irq_vector(struct irq_data *irqd) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&vector_lock, flags); + reserve_irq_vector_locked(irqd); + raw_spin_unlock_irqrestore(&vector_lock, flags); + return 0; +} - if (test_bit(vector, used_vectors)) - goto next; +static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + bool resvd = apicd->has_reserved; + unsigned int cpu = apicd->cpu; + int vector = apicd->vector; - for_each_cpu(new_cpu, vector_searchmask) { - if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector])) - goto next; - } - /* Found one! */ - current_vector = vector; - current_offset = offset; - /* Schedule the old vector for cleanup on all cpus */ - if (d->cfg.vector) - cpumask_copy(d->old_domain, d->domain); - for_each_cpu(new_cpu, vector_searchmask) - per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); - goto update; - -next_cpu: - /* - * We exclude the current @vector_cpumask from the requested - * @mask and try again with the next online cpu in the - * result. We cannot modify @mask, so we use @vector_cpumask - * as a temporary buffer here as it will be reassigned when - * calling apic->vector_allocation_domain() above. - */ - cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask); - cpumask_andnot(vector_cpumask, mask, searched_cpumask); - cpu = cpumask_first_and(vector_cpumask, cpu_online_mask); - continue; - } - return -ENOSPC; + lockdep_assert_held(&vector_lock); -update: /* - * Exclude offline cpus from the cleanup mask and set the - * move_in_progress flag when the result is not empty. + * If the current target CPU is online and in the new requested + * affinity mask, there is no point in moving the interrupt from + * one CPU to another. */ - cpumask_and(d->old_domain, d->old_domain, cpu_online_mask); - d->move_in_progress = !cpumask_empty(d->old_domain); - d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0; - d->cfg.vector = vector; - cpumask_copy(d->domain, vector_cpumask); -success: - /* - * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail - * as we already established, that mask & d->domain & cpu_online_mask - * is not empty. - * - * vector_searchmask is a subset of d->domain and has the offline - * cpus masked out. - */ - cpumask_and(vector_searchmask, vector_searchmask, mask); - BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqdata, - &d->cfg.dest_apicid)); + if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest)) + return 0; + + vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu); + if (vector > 0) + apic_update_vector(irqd, vector, cpu); + trace_vector_alloc(irqd->irq, vector, resvd, vector); + return vector; +} + +static int assign_vector_locked(struct irq_data *irqd, + const struct cpumask *dest) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + int vector = allocate_vector(irqd, dest); + + if (vector < 0) + return vector; + + apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu); return 0; } -static int assign_irq_vector(int irq, struct apic_chip_data *data, - const struct cpumask *mask, - struct irq_data *irqdata) +static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest) { - int err; unsigned long flags; + int ret; raw_spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, data, mask, irqdata); + cpumask_and(vector_searchmask, dest, cpu_online_mask); + ret = assign_vector_locked(irqd, vector_searchmask); raw_spin_unlock_irqrestore(&vector_lock, flags); - return err; + return ret; } -static int assign_irq_vector_policy(int irq, int node, - struct apic_chip_data *data, - struct irq_alloc_info *info, - struct irq_data *irqdata) +static int assign_irq_vector_any_locked(struct irq_data *irqd) { - if (info && info->mask) - return assign_irq_vector(irq, data, info->mask, irqdata); - if (node != NUMA_NO_NODE && - assign_irq_vector(irq, data, cpumask_of_node(node), irqdata) == 0) + /* Get the affinity mask - either irq_default_affinity or (user) set */ + const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); + int node = irq_data_get_node(irqd); + + if (node == NUMA_NO_NODE) + goto all; + /* Try the intersection of @affmsk and node mask */ + cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk); + if (!assign_vector_locked(irqd, vector_searchmask)) + return 0; + /* Try the node mask */ + if (!assign_vector_locked(irqd, cpumask_of_node(node))) return 0; - return assign_irq_vector(irq, data, apic->target_cpus(), irqdata); +all: + /* Try the full affinity mask */ + cpumask_and(vector_searchmask, affmsk, cpu_online_mask); + if (!assign_vector_locked(irqd, vector_searchmask)) + return 0; + /* Try the full online mask */ + return assign_vector_locked(irqd, cpu_online_mask); +} + +static int +assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info) +{ + if (irqd_affinity_is_managed(irqd)) + return reserve_managed_vector(irqd); + if (info->mask) + return assign_irq_vector(irqd, info->mask); + /* + * Make only a global reservation with no guarantee. A real vector + * is associated at activation time. + */ + return reserve_irq_vector(irqd); } -static void clear_irq_vector(int irq, struct apic_chip_data *data) +static int +assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest) { - struct irq_desc *desc; - int cpu, vector; + const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); + struct apic_chip_data *apicd = apic_chip_data(irqd); + int vector, cpu; - if (!data->cfg.vector) + cpumask_and(vector_searchmask, vector_searchmask, affmsk); + cpu = cpumask_first(vector_searchmask); + if (cpu >= nr_cpu_ids) + return -EINVAL; + /* set_affinity might call here for nothing */ + if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask)) + return 0; + vector = irq_matrix_alloc_managed(vector_matrix, cpu); + trace_vector_alloc_managed(irqd->irq, vector, vector); + if (vector < 0) + return vector; + apic_update_vector(irqd, vector, cpu); + apic_update_irq_cfg(irqd, vector, cpu); + return 0; +} + +static void clear_irq_vector(struct irq_data *irqd) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + bool managed = irqd_affinity_is_managed(irqd); + unsigned int vector = apicd->vector; + + lockdep_assert_held(&vector_lock); + + if (!vector) return; - vector = data->cfg.vector; - for_each_cpu_and(cpu, data->domain, cpu_online_mask) - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; + trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector, + apicd->prev_cpu); - data->cfg.vector = 0; - cpumask_clear(data->domain); + per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED; + irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); + apicd->vector = 0; - /* - * If move is in progress or the old_domain mask is not empty, - * i.e. the cleanup IPI has not been processed yet, we need to remove - * the old references to desc from all cpus vector tables. - */ - if (!data->move_in_progress && cpumask_empty(data->old_domain)) + /* Clean up move in progress */ + vector = apicd->prev_vector; + if (!vector) return; - desc = irq_to_desc(irq); - for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; - vector++) { - if (per_cpu(vector_irq, cpu)[vector] != desc) - continue; - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; - break; - } + per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED; + irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); + apicd->prev_vector = 0; + apicd->move_in_progress = 0; + hlist_del_init(&apicd->clist); +} + +static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + unsigned long flags; + + trace_vector_deactivate(irqd->irq, apicd->is_managed, + apicd->can_reserve, false); + + /* Regular fixed assigned interrupt */ + if (!apicd->is_managed && !apicd->can_reserve) + return; + /* If the interrupt has a global reservation, nothing to do */ + if (apicd->has_reserved) + return; + + raw_spin_lock_irqsave(&vector_lock, flags); + clear_irq_vector(irqd); + if (apicd->can_reserve) + reserve_irq_vector_locked(irqd); + else + vector_assign_managed_shutdown(irqd); + raw_spin_unlock_irqrestore(&vector_lock, flags); +} + +static int activate_reserved(struct irq_data *irqd) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + int ret; + + ret = assign_irq_vector_any_locked(irqd); + if (!ret) + apicd->has_reserved = false; + return ret; +} + +static int activate_managed(struct irq_data *irqd) +{ + const struct cpumask *dest = irq_data_get_affinity_mask(irqd); + int ret; + + cpumask_and(vector_searchmask, dest, cpu_online_mask); + if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) { + /* Something in the core code broke! Survive gracefully */ + pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq); + return EINVAL; + } + + ret = assign_managed_vector(irqd, vector_searchmask); + /* + * This should not happen. The vector reservation got buggered. Handle + * it gracefully. + */ + if (WARN_ON_ONCE(ret < 0)) { + pr_err("Managed startup irq %u, no vector available\n", + irqd->irq); } - data->move_in_progress = 0; + return ret; } -void init_irq_alloc_info(struct irq_alloc_info *info, - const struct cpumask *mask) +static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, + bool early) { - memset(info, 0, sizeof(*info)); - info->mask = mask; + struct apic_chip_data *apicd = apic_chip_data(irqd); + unsigned long flags; + int ret = 0; + + trace_vector_activate(irqd->irq, apicd->is_managed, + apicd->can_reserve, early); + + /* Nothing to do for fixed assigned vectors */ + if (!apicd->can_reserve && !apicd->is_managed) + return 0; + + raw_spin_lock_irqsave(&vector_lock, flags); + if (early || irqd_is_managed_and_shutdown(irqd)) + vector_assign_managed_shutdown(irqd); + else if (apicd->is_managed) + ret = activate_managed(irqd); + else if (apicd->has_reserved) + ret = activate_reserved(irqd); + raw_spin_unlock_irqrestore(&vector_lock, flags); + return ret; } -void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) +static void vector_free_reserved_and_managed(struct irq_data *irqd) { - if (src) - *dst = *src; - else - memset(dst, 0, sizeof(*dst)); + const struct cpumask *dest = irq_data_get_affinity_mask(irqd); + struct apic_chip_data *apicd = apic_chip_data(irqd); + + trace_vector_teardown(irqd->irq, apicd->is_managed, + apicd->has_reserved); + + if (apicd->has_reserved) + irq_matrix_remove_reserved(vector_matrix); + if (apicd->is_managed) + irq_matrix_remove_managed(vector_matrix, dest); } static void x86_vector_free_irqs(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { - struct apic_chip_data *apic_data; - struct irq_data *irq_data; + struct apic_chip_data *apicd; + struct irq_data *irqd; unsigned long flags; int i; for (i = 0; i < nr_irqs; i++) { - irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); - if (irq_data && irq_data->chip_data) { + irqd = irq_domain_get_irq_data(x86_vector_domain, virq + i); + if (irqd && irqd->chip_data) { raw_spin_lock_irqsave(&vector_lock, flags); - clear_irq_vector(virq + i, irq_data->chip_data); - apic_data = irq_data->chip_data; - irq_domain_reset_irq_data(irq_data); + clear_irq_vector(irqd); + vector_free_reserved_and_managed(irqd); + apicd = irqd->chip_data; + irq_domain_reset_irq_data(irqd); raw_spin_unlock_irqrestore(&vector_lock, flags); - free_apic_chip_data(apic_data); -#ifdef CONFIG_X86_IO_APIC - if (virq + i < nr_legacy_irqs()) - legacy_irq_data[virq + i] = NULL; -#endif + free_apic_chip_data(apicd); } } } +static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, + struct apic_chip_data *apicd) +{ + unsigned long flags; + bool realloc = false; + + apicd->vector = ISA_IRQ_VECTOR(virq); + apicd->cpu = 0; + + raw_spin_lock_irqsave(&vector_lock, flags); + /* + * If the interrupt is activated, then it must stay at this vector + * position. That's usually the timer interrupt (0). + */ + if (irqd_is_activated(irqd)) { + trace_vector_setup(virq, true, 0); + apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu); + } else { + /* Release the vector */ + apicd->can_reserve = true; + clear_irq_vector(irqd); + realloc = true; + } + raw_spin_unlock_irqrestore(&vector_lock, flags); + return realloc; +} + static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg) { struct irq_alloc_info *info = arg; - struct apic_chip_data *data; - struct irq_data *irq_data; + struct apic_chip_data *apicd; + struct irq_data *irqd; int i, err, node; if (disable_apic) @@ -350,34 +501,37 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, return -ENOSYS; for (i = 0; i < nr_irqs; i++) { - irq_data = irq_domain_get_irq_data(domain, virq + i); - BUG_ON(!irq_data); - node = irq_data_get_node(irq_data); -#ifdef CONFIG_X86_IO_APIC - if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i]) - data = legacy_irq_data[virq + i]; - else -#endif - data = alloc_apic_chip_data(node); - if (!data) { + irqd = irq_domain_get_irq_data(domain, virq + i); + BUG_ON(!irqd); + node = irq_data_get_node(irqd); + WARN_ON_ONCE(irqd->chip_data); + apicd = alloc_apic_chip_data(node); + if (!apicd) { err = -ENOMEM; goto error; } - irq_data->chip = &lapic_controller; - irq_data->chip_data = data; - irq_data->hwirq = virq + i; - err = assign_irq_vector_policy(virq + i, node, data, info, - irq_data); - if (err) - goto error; + apicd->irq = virq + i; + irqd->chip = &lapic_controller; + irqd->chip_data = apicd; + irqd->hwirq = virq + i; + irqd_set_single_target(irqd); /* - * If the apic destination mode is physical, then the - * effective affinity is restricted to a single target - * CPU. Mark the interrupt accordingly. + * Legacy vectors are already assigned when the IOAPIC + * takes them over. They stay on the same vector. This is + * required for check_timer() to work correctly as it might + * switch back to legacy mode. Only update the hardware + * config. */ - if (!apic->irq_dest_mode) - irqd_set_single_target(irq_data); + if (info->flags & X86_IRQ_ALLOC_LEGACY) { + if (!vector_configure_legacy(virq + i, irqd, apicd)) + continue; + } + + err = assign_irq_vector_policy(irqd, info); + trace_vector_setup(virq + i, false, err); + if (err) + goto error; } return 0; @@ -387,9 +541,56 @@ error: return err; } +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind) +{ + unsigned int cpu, vector, prev_cpu, prev_vector; + struct apic_chip_data *apicd; + unsigned long flags; + int irq; + + if (!irqd) { + irq_matrix_debug_show(m, vector_matrix, ind); + return; + } + + irq = irqd->irq; + if (irq < nr_legacy_irqs() && !test_bit(irq, &io_apic_irqs)) { + seq_printf(m, "%*sVector: %5d\n", ind, "", ISA_IRQ_VECTOR(irq)); + seq_printf(m, "%*sTarget: Legacy PIC all CPUs\n", ind, ""); + return; + } + + apicd = irqd->chip_data; + if (!apicd) { + seq_printf(m, "%*sVector: Not assigned\n", ind, ""); + return; + } + + raw_spin_lock_irqsave(&vector_lock, flags); + cpu = apicd->cpu; + vector = apicd->vector; + prev_cpu = apicd->prev_cpu; + prev_vector = apicd->prev_vector; + raw_spin_unlock_irqrestore(&vector_lock, flags); + seq_printf(m, "%*sVector: %5u\n", ind, "", vector); + seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu); + if (prev_vector) { + seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vector); + seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu); + } +} +#endif + static const struct irq_domain_ops x86_vector_domain_ops = { - .alloc = x86_vector_alloc_irqs, - .free = x86_vector_free_irqs, + .alloc = x86_vector_alloc_irqs, + .free = x86_vector_free_irqs, + .activate = x86_vector_activate, + .deactivate = x86_vector_deactivate, +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + .debug_show = x86_vector_debug_show, +#endif }; int __init arch_probe_nr_irqs(void) @@ -419,35 +620,40 @@ int __init arch_probe_nr_irqs(void) return legacy_pic->probe(); } -#ifdef CONFIG_X86_IO_APIC -static void __init init_legacy_irqs(void) +void lapic_assign_legacy_vector(unsigned int irq, bool replace) { - int i, node = cpu_to_node(0); - struct apic_chip_data *data; - /* - * For legacy IRQ's, start with assigning irq0 to irq15 to - * ISA_IRQ_VECTOR(i) for all cpu's. + * Use assign system here so it wont get accounted as allocated + * and moveable in the cpu hotplug check and it prevents managed + * irq reservation from touching it. */ - for (i = 0; i < nr_legacy_irqs(); i++) { - data = legacy_irq_data[i] = alloc_apic_chip_data(node); - BUG_ON(!data); + irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); +} + +void __init lapic_assign_system_vectors(void) +{ + unsigned int i, vector = 0; - data->cfg.vector = ISA_IRQ_VECTOR(i); - cpumask_setall(data->domain); - irq_set_chip_data(i, data); + for_each_set_bit_from(vector, system_vectors, NR_VECTORS) + irq_matrix_assign_system(vector_matrix, vector, false); + + if (nr_legacy_irqs() > 1) + lapic_assign_legacy_vector(PIC_CASCADE_IR, false); + + /* System vectors are reserved, online it */ + irq_matrix_online(vector_matrix); + + /* Mark the preallocated legacy interrupts */ + for (i = 0; i < nr_legacy_irqs(); i++) { + if (i != PIC_CASCADE_IR) + irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i)); } } -#else -static inline void init_legacy_irqs(void) { } -#endif int __init arch_early_irq_init(void) { struct fwnode_handle *fn; - init_legacy_irqs(); - fn = irq_domain_alloc_named_fwnode("VECTOR"); BUG_ON(!fn); x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops, @@ -459,100 +665,115 @@ int __init arch_early_irq_init(void) arch_init_msi_domain(x86_vector_domain); arch_init_htirq_domain(x86_vector_domain); - BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); - BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL)); + + /* + * Allocate the vector matrix allocator data structure and limit the + * search area. + */ + vector_matrix = irq_alloc_matrix(NR_VECTORS, FIRST_EXTERNAL_VECTOR, + FIRST_SYSTEM_VECTOR); + BUG_ON(!vector_matrix); return arch_early_ioapic_init(); } -/* Initialize vector_irq on a new cpu */ -static void __setup_vector_irq(int cpu) +#ifdef CONFIG_SMP + +static struct irq_desc *__setup_vector_irq(int vector) { - struct apic_chip_data *data; - struct irq_desc *desc; - int irq, vector; + int isairq = vector - ISA_IRQ_VECTOR(0); + + /* Check whether the irq is in the legacy space */ + if (isairq < 0 || isairq >= nr_legacy_irqs()) + return VECTOR_UNUSED; + /* Check whether the irq is handled by the IOAPIC */ + if (test_bit(isairq, &io_apic_irqs)) + return VECTOR_UNUSED; + return irq_to_desc(isairq); +} - /* Mark the inuse vectors */ - for_each_irq_desc(irq, desc) { - struct irq_data *idata = irq_desc_get_irq_data(desc); +/* Online the local APIC infrastructure and initialize the vectors */ +void lapic_online(void) +{ + unsigned int vector; - data = apic_chip_data(idata); - if (!data || !cpumask_test_cpu(cpu, data->domain)) - continue; - vector = data->cfg.vector; - per_cpu(vector_irq, cpu)[vector] = desc; - } - /* Mark the free vectors */ - for (vector = 0; vector < NR_VECTORS; ++vector) { - desc = per_cpu(vector_irq, cpu)[vector]; - if (IS_ERR_OR_NULL(desc)) - continue; + lockdep_assert_held(&vector_lock); - data = apic_chip_data(irq_desc_get_irq_data(desc)); - if (!cpumask_test_cpu(cpu, data->domain)) - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; - } + /* Online the vector matrix array for this CPU */ + irq_matrix_online(vector_matrix); + + /* + * The interrupt affinity logic never targets interrupts to offline + * CPUs. The exception are the legacy PIC interrupts. In general + * they are only targeted to CPU0, but depending on the platform + * they can be distributed to any online CPU in hardware. The + * kernel has no influence on that. So all active legacy vectors + * must be installed on all CPUs. All non legacy interrupts can be + * cleared. + */ + for (vector = 0; vector < NR_VECTORS; vector++) + this_cpu_write(vector_irq[vector], __setup_vector_irq(vector)); } -/* - * Setup the vector to irq mappings. Must be called with vector_lock held. - */ -void setup_vector_irq(int cpu) +void lapic_offline(void) { - int irq; + lock_vector_lock(); + irq_matrix_offline(vector_matrix); + unlock_vector_lock(); +} + +static int apic_set_affinity(struct irq_data *irqd, + const struct cpumask *dest, bool force) +{ + struct apic_chip_data *apicd = apic_chip_data(irqd); + int err; - lockdep_assert_held(&vector_lock); /* - * On most of the platforms, legacy PIC delivers the interrupts on the - * boot cpu. But there are certain platforms where PIC interrupts are - * delivered to multiple cpu's. If the legacy IRQ is handled by the - * legacy PIC, for the new cpu that is coming online, setup the static - * legacy vector to irq mapping: + * Core code can call here for inactive interrupts. For inactive + * interrupts which use managed or reservation mode there is no + * point in going through the vector assignment right now as the + * activation will assign a vector which fits the destination + * cpumask. Let the core code store the destination mask and be + * done with it. */ - for (irq = 0; irq < nr_legacy_irqs(); irq++) - per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq_to_desc(irq); + if (!irqd_is_activated(irqd) && + (apicd->is_managed || apicd->can_reserve)) + return IRQ_SET_MASK_OK; - __setup_vector_irq(cpu); + raw_spin_lock(&vector_lock); + cpumask_and(vector_searchmask, dest, cpu_online_mask); + if (irqd_affinity_is_managed(irqd)) + err = assign_managed_vector(irqd, vector_searchmask); + else + err = assign_vector_locked(irqd, vector_searchmask); + raw_spin_unlock(&vector_lock); + return err ? err : IRQ_SET_MASK_OK; } -static int apic_retrigger_irq(struct irq_data *irq_data) +#else +# define apic_set_affinity NULL +#endif + +static int apic_retrigger_irq(struct irq_data *irqd) { - struct apic_chip_data *data = apic_chip_data(irq_data); + struct apic_chip_data *apicd = apic_chip_data(irqd); unsigned long flags; - int cpu; raw_spin_lock_irqsave(&vector_lock, flags); - cpu = cpumask_first_and(data->domain, cpu_online_mask); - apic->send_IPI_mask(cpumask_of(cpu), data->cfg.vector); + apic->send_IPI(apicd->cpu, apicd->vector); raw_spin_unlock_irqrestore(&vector_lock, flags); return 1; } -void apic_ack_edge(struct irq_data *data) +void apic_ack_edge(struct irq_data *irqd) { - irq_complete_move(irqd_cfg(data)); - irq_move_irq(data); + irq_complete_move(irqd_cfg(irqd)); + irq_move_irq(irqd); ack_APIC_irq(); } -static int apic_set_affinity(struct irq_data *irq_data, - const struct cpumask *dest, bool force) -{ - struct apic_chip_data *data = irq_data->chip_data; - int err, irq = irq_data->irq; - - if (!IS_ENABLED(CONFIG_SMP)) - return -EPERM; - - if (!cpumask_intersects(dest, cpu_online_mask)) - return -EINVAL; - - err = assign_irq_vector(irq, data, dest, irq_data); - return err ? err : IRQ_SET_MASK_OK; -} - static struct irq_chip lapic_controller = { .name = "APIC", .irq_ack = apic_ack_edge, @@ -561,115 +782,98 @@ static struct irq_chip lapic_controller = { }; #ifdef CONFIG_SMP -static void __send_cleanup_vector(struct apic_chip_data *data) -{ - raw_spin_lock(&vector_lock); - cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); - data->move_in_progress = 0; - if (!cpumask_empty(data->old_domain)) - apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR); - raw_spin_unlock(&vector_lock); -} -void send_cleanup_vector(struct irq_cfg *cfg) +static void free_moved_vector(struct apic_chip_data *apicd) { - struct apic_chip_data *data; + unsigned int vector = apicd->prev_vector; + unsigned int cpu = apicd->prev_cpu; + bool managed = apicd->is_managed; - data = container_of(cfg, struct apic_chip_data, cfg); - if (data->move_in_progress) - __send_cleanup_vector(data); + /* + * This should never happen. Managed interrupts are not + * migrated except on CPU down, which does not involve the + * cleanup vector. But try to keep the accounting correct + * nevertheless. + */ + WARN_ON_ONCE(managed); + + trace_vector_free_moved(apicd->irq, cpu, vector, managed); + irq_matrix_free(vector_matrix, cpu, vector, managed); + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; + hlist_del_init(&apicd->clist); + apicd->prev_vector = 0; + apicd->move_in_progress = 0; } asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void) { - unsigned vector, me; + struct hlist_head *clhead = this_cpu_ptr(&cleanup_list); + struct apic_chip_data *apicd; + struct hlist_node *tmp; entering_ack_irq(); - /* Prevent vectors vanishing under us */ raw_spin_lock(&vector_lock); - me = smp_processor_id(); - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - struct apic_chip_data *data; - struct irq_desc *desc; - unsigned int irr; - - retry: - desc = __this_cpu_read(vector_irq[vector]); - if (IS_ERR_OR_NULL(desc)) - continue; - - if (!raw_spin_trylock(&desc->lock)) { - raw_spin_unlock(&vector_lock); - cpu_relax(); - raw_spin_lock(&vector_lock); - goto retry; - } - - data = apic_chip_data(irq_desc_get_irq_data(desc)); - if (!data) - goto unlock; + hlist_for_each_entry_safe(apicd, tmp, clhead, clist) { + unsigned int irr, vector = apicd->prev_vector; /* - * Nothing to cleanup if irq migration is in progress - * or this cpu is not set in the cleanup mask. - */ - if (data->move_in_progress || - !cpumask_test_cpu(me, data->old_domain)) - goto unlock; - - /* - * We have two cases to handle here: - * 1) vector is unchanged but the target mask got reduced - * 2) vector and the target mask has changed - * - * #1 is obvious, but in #2 we have two vectors with the same - * irq descriptor: the old and the new vector. So we need to - * make sure that we only cleanup the old vector. The new - * vector has the current @vector number in the config and - * this cpu is part of the target mask. We better leave that - * one alone. - */ - if (vector == data->cfg.vector && - cpumask_test_cpu(me, data->domain)) - goto unlock; - - irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - /* - * Check if the vector that needs to be cleanedup is - * registered at the cpu's IRR. If so, then this is not - * the best time to clean it up. Lets clean it up in the + * Paranoia: Check if the vector that needs to be cleaned + * up is registered at the APICs IRR. If so, then this is + * not the best time to clean it up. Clean it up in the * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR - * to myself. + * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest + * priority external vector, so on return from this + * interrupt the device interrupt will happen first. */ - if (irr & (1 << (vector % 32))) { + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + if (irr & (1U << (vector % 32))) { apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); - goto unlock; + continue; } - __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); - cpumask_clear_cpu(me, data->old_domain); -unlock: - raw_spin_unlock(&desc->lock); + free_moved_vector(apicd); } raw_spin_unlock(&vector_lock); - exiting_irq(); } +static void __send_cleanup_vector(struct apic_chip_data *apicd) +{ + unsigned int cpu; + + raw_spin_lock(&vector_lock); + apicd->move_in_progress = 0; + cpu = apicd->prev_cpu; + if (cpu_online(cpu)) { + hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu)); + apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR); + } else { + apicd->prev_vector = 0; + } + raw_spin_unlock(&vector_lock); +} + +void send_cleanup_vector(struct irq_cfg *cfg) +{ + struct apic_chip_data *apicd; + + apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); + if (apicd->move_in_progress) + __send_cleanup_vector(apicd); +} + static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) { - unsigned me; - struct apic_chip_data *data; + struct apic_chip_data *apicd; - data = container_of(cfg, struct apic_chip_data, cfg); - if (likely(!data->move_in_progress)) + apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); + if (likely(!apicd->move_in_progress)) return; - me = smp_processor_id(); - if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain)) - __send_cleanup_vector(data); + if (vector == apicd->vector && apicd->cpu == smp_processor_id()) + __send_cleanup_vector(apicd); } void irq_complete_move(struct irq_cfg *cfg) @@ -682,10 +886,9 @@ void irq_complete_move(struct irq_cfg *cfg) */ void irq_force_complete_move(struct irq_desc *desc) { - struct irq_data *irqdata; - struct apic_chip_data *data; - struct irq_cfg *cfg; - unsigned int cpu; + struct apic_chip_data *apicd; + struct irq_data *irqd; + unsigned int vector; /* * The function is called for all descriptors regardless of which @@ -696,43 +899,31 @@ void irq_force_complete_move(struct irq_desc *desc) * Check first that the chip_data is what we expect * (apic_chip_data) before touching it any further. */ - irqdata = irq_domain_get_irq_data(x86_vector_domain, - irq_desc_get_irq(desc)); - if (!irqdata) + irqd = irq_domain_get_irq_data(x86_vector_domain, + irq_desc_get_irq(desc)); + if (!irqd) return; - data = apic_chip_data(irqdata); - cfg = data ? &data->cfg : NULL; + raw_spin_lock(&vector_lock); + apicd = apic_chip_data(irqd); + if (!apicd) + goto unlock; - if (!cfg) - return; + /* + * If prev_vector is empty, no action required. + */ + vector = apicd->prev_vector; + if (!vector) + goto unlock; /* - * This is tricky. If the cleanup of @data->old_domain has not been + * This is tricky. If the cleanup of the old vector has not been * done yet, then the following setaffinity call will fail with * -EBUSY. This can leave the interrupt in a stale state. * * All CPUs are stuck in stop machine with interrupts disabled so * calling __irq_complete_move() would be completely pointless. - */ - raw_spin_lock(&vector_lock); - /* - * Clean out all offline cpus (including the outgoing one) from the - * old_domain mask. - */ - cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); - - /* - * If move_in_progress is cleared and the old_domain mask is empty, - * then there is nothing to cleanup. fixup_irqs() will take care of - * the stale vectors on the outgoing cpu. - */ - if (!data->move_in_progress && cpumask_empty(data->old_domain)) { - raw_spin_unlock(&vector_lock); - return; - } - - /* + * * 1) The interrupt is in move_in_progress state. That means that we * have not seen an interrupt since the io_apic was reprogrammed to * the new vector. @@ -740,7 +931,7 @@ void irq_force_complete_move(struct irq_desc *desc) * 2) The interrupt has fired on the new vector, but the cleanup IPIs * have not been processed yet. */ - if (data->move_in_progress) { + if (apicd->move_in_progress) { /* * In theory there is a race: * @@ -774,21 +965,43 @@ void irq_force_complete_move(struct irq_desc *desc) * area arises. */ pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n", - irqdata->irq, cfg->old_vector); + irqd->irq, vector); } - /* - * If old_domain is not empty, then other cpus still have the irq - * descriptor set in their vector array. Clean it up. - */ - for_each_cpu(cpu, data->old_domain) - per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED; + free_moved_vector(apicd); +unlock: + raw_spin_unlock(&vector_lock); +} + +#ifdef CONFIG_HOTPLUG_CPU +/* + * Note, this is not accurate accounting, but at least good enough to + * prevent that the actual interrupt move will run out of vectors. + */ +int lapic_can_unplug_cpu(void) +{ + unsigned int rsvd, avl, tomove, cpu = smp_processor_id(); + int ret = 0; - /* Cleanup the left overs of the (half finished) move */ - cpumask_clear(data->old_domain); - data->move_in_progress = 0; + raw_spin_lock(&vector_lock); + tomove = irq_matrix_allocated(vector_matrix); + avl = irq_matrix_available(vector_matrix, true); + if (avl < tomove) { + pr_warn("CPU %u has %u vectors, %u available. Cannot disable CPU\n", + cpu, tomove, avl); + ret = -ENOSPC; + goto out; + } + rsvd = irq_matrix_reserved(vector_matrix); + if (avl < rsvd) { + pr_warn("Reserved vectors %u > available %u. IRQ request may fail\n", + rsvd, avl); + } +out: raw_spin_unlock(&vector_lock); + return ret; } -#endif +#endif /* HOTPLUG_CPU */ +#endif /* SMP */ static void __init print_APIC_field(int base) { diff --git a/arch/x86/kernel/apic/x2apic.h b/arch/x86/kernel/apic/x2apic.h new file mode 100644 index 000000000000..b107de381cb5 --- /dev/null +++ b/arch/x86/kernel/apic/x2apic.h @@ -0,0 +1,9 @@ +/* Common bits for X2APIC cluster/physical modes. */ + +int x2apic_apic_id_valid(int apicid); +int x2apic_apic_id_registered(void); +void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); +unsigned int x2apic_get_apic_id(unsigned long id); +u32 x2apic_set_apic_id(unsigned int id); +int x2apic_phys_pkg_id(int initial_apicid, int index_msb); +void x2apic_send_IPI_self(int vector); diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index e216cf3d64d2..622f13ca8a94 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -9,22 +9,24 @@ #include <linux/cpu.h> #include <asm/smp.h> -#include <asm/x2apic.h> +#include "x2apic.h" + +struct cluster_mask { + unsigned int clusterid; + int node; + struct cpumask mask; +}; static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); -static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster); static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); +static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks); +static struct cluster_mask *cluster_hotplug_mask; static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return x2apic_enabled(); } -static inline u32 x2apic_cluster(int cpu) -{ - return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16; -} - static void x2apic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu); @@ -36,49 +38,34 @@ static void x2apic_send_IPI(int cpu, int vector) static void __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) { - struct cpumask *cpus_in_cluster_ptr; - struct cpumask *ipi_mask_ptr; - unsigned int cpu, this_cpu; + unsigned int cpu, clustercpu; + struct cpumask *tmpmsk; unsigned long flags; u32 dest; x2apic_wrmsr_fence(); - local_irq_save(flags); - this_cpu = smp_processor_id(); + tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask); + cpumask_copy(tmpmsk, mask); + /* If IPI should not be sent to self, clear current CPU */ + if (apic_dest != APIC_DEST_ALLINC) + cpumask_clear_cpu(smp_processor_id(), tmpmsk); - /* - * We are to modify mask, so we need an own copy - * and be sure it's manipulated with irq off. - */ - ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask); - cpumask_copy(ipi_mask_ptr, mask); - - /* - * The idea is to send one IPI per cluster. - */ - for_each_cpu(cpu, ipi_mask_ptr) { - unsigned long i; + /* Collapse cpus in a cluster so a single IPI per cluster is sent */ + for_each_cpu(cpu, tmpmsk) { + struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu); - cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu); dest = 0; - - /* Collect cpus in cluster. */ - for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) { - if (apic_dest == APIC_DEST_ALLINC || i != this_cpu) - dest |= per_cpu(x86_cpu_to_logical_apicid, i); - } + for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask) + dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu); if (!dest) continue; __x2apic_send_IPI_dest(dest, vector, apic->dest_logical); - /* - * Cluster sibling cpus should be discared now so - * we would not send IPI them second time. - */ - cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr); + /* Remove cluster CPUs from tmpmask */ + cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask); } local_irq_restore(flags); @@ -105,125 +92,90 @@ static void x2apic_send_IPI_all(int vector) __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); } -static int -x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata, - unsigned int *apicid) +static u32 x2apic_calc_apicid(unsigned int cpu) { - struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata); - unsigned int cpu; - u32 dest = 0; - u16 cluster; - - cpu = cpumask_first(mask); - if (cpu >= nr_cpu_ids) - return -EINVAL; - - dest = per_cpu(x86_cpu_to_logical_apicid, cpu); - cluster = x2apic_cluster(cpu); - - cpumask_clear(effmsk); - for_each_cpu(cpu, mask) { - if (cluster != x2apic_cluster(cpu)) - continue; - dest |= per_cpu(x86_cpu_to_logical_apicid, cpu); - cpumask_set_cpu(cpu, effmsk); - } - - *apicid = dest; - return 0; + return per_cpu(x86_cpu_to_logical_apicid, cpu); } static void init_x2apic_ldr(void) { - unsigned int this_cpu = smp_processor_id(); + struct cluster_mask *cmsk = this_cpu_read(cluster_masks); + u32 cluster, apicid = apic_read(APIC_LDR); unsigned int cpu; - per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR); + this_cpu_write(x86_cpu_to_logical_apicid, apicid); + + if (cmsk) + goto update; - cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu)); + cluster = apicid >> 16; for_each_online_cpu(cpu) { - if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) - continue; - cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); - cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); + cmsk = per_cpu(cluster_masks, cpu); + /* Matching cluster found. Link and update it. */ + if (cmsk && cmsk->clusterid == cluster) + goto update; } + cmsk = cluster_hotplug_mask; + cluster_hotplug_mask = NULL; +update: + this_cpu_write(cluster_masks, cmsk); + cpumask_set_cpu(smp_processor_id(), &cmsk->mask); } -/* - * At CPU state changes, update the x2apic cluster sibling info. - */ -static int x2apic_prepare_cpu(unsigned int cpu) +static int alloc_clustermask(unsigned int cpu, int node) { - if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL)) - return -ENOMEM; + if (per_cpu(cluster_masks, cpu)) + return 0; + /* + * If a hotplug spare mask exists, check whether it's on the right + * node. If not, free it and allocate a new one. + */ + if (cluster_hotplug_mask) { + if (cluster_hotplug_mask->node == node) + return 0; + kfree(cluster_hotplug_mask); + } - if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) { - free_cpumask_var(per_cpu(cpus_in_cluster, cpu)); + cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask), + GFP_KERNEL, node); + if (!cluster_hotplug_mask) return -ENOMEM; - } + cluster_hotplug_mask->node = node; + return 0; +} +static int x2apic_prepare_cpu(unsigned int cpu) +{ + if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0) + return -ENOMEM; + if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) + return -ENOMEM; return 0; } -static int x2apic_dead_cpu(unsigned int this_cpu) +static int x2apic_dead_cpu(unsigned int dead_cpu) { - int cpu; + struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); - for_each_online_cpu(cpu) { - if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) - continue; - cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); - cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); - } - free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); - free_cpumask_var(per_cpu(ipi_mask, this_cpu)); + cpumask_clear_cpu(dead_cpu, &cmsk->mask); + free_cpumask_var(per_cpu(ipi_mask, dead_cpu)); return 0; } static int x2apic_cluster_probe(void) { - int cpu = smp_processor_id(); - int ret; - if (!x2apic_mode) return 0; - ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare", - x2apic_prepare_cpu, x2apic_dead_cpu); - if (ret < 0) { + if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare", + x2apic_prepare_cpu, x2apic_dead_cpu) < 0) { pr_err("Failed to register X2APIC_PREPARE\n"); return 0; } - cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); + init_x2apic_ldr(); return 1; } -static const struct cpumask *x2apic_cluster_target_cpus(void) -{ - return cpu_all_mask; -} - -/* - * Each x2apic cluster is an allocation domain. - */ -static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask) -{ - /* - * To minimize vector pressure, default case of boot, device bringup - * etc will use a single cpu for the interrupt destination. - * - * On explicit migration requests coming from irqbalance etc, - * interrupts will be routed to the x2apic cluster (cluster-id - * derived from the first cpu in the mask) members specified - * in the mask. - */ - if (mask == x2apic_cluster_target_cpus()) - cpumask_copy(retmask, cpumask_of(cpu)); - else - cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); -} - static struct apic apic_x2apic_cluster __ro_after_init = { .name = "cluster x2apic", @@ -235,12 +187,10 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ - .target_cpus = x2apic_cluster_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, - .vector_allocation_domain = cluster_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, @@ -253,7 +203,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .calc_dest_apicid = x2apic_calc_apicid, .send_IPI = x2apic_send_IPI, .send_IPI_mask = x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index b94d35320f85..f8d9d69994e6 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -7,7 +7,8 @@ #include <linux/dmar.h> #include <asm/smp.h> -#include <asm/x2apic.h> +#include <asm/ipi.h> +#include "x2apic.h" int x2apic_phys; @@ -99,6 +100,43 @@ static int x2apic_phys_probe(void) return apic == &apic_x2apic_phys; } +/* Common x2apic functions, also used by x2apic_cluster */ +int x2apic_apic_id_valid(int apicid) +{ + return 1; +} + +int x2apic_apic_id_registered(void) +{ + return 1; +} + +void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) +{ + unsigned long cfg = __prepare_ICR(0, vector, dest); + native_x2apic_icr_write(cfg, apicid); +} + +unsigned int x2apic_get_apic_id(unsigned long id) +{ + return id; +} + +u32 x2apic_set_apic_id(unsigned int id) +{ + return id; +} + +int x2apic_phys_pkg_id(int initial_apicid, int index_msb) +{ + return initial_apicid >> index_msb; +} + +void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + static struct apic apic_x2apic_phys __ro_after_init = { .name = "physical x2apic", @@ -110,12 +148,10 @@ static struct apic apic_x2apic_phys __ro_after_init = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, - .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, @@ -128,7 +164,7 @@ static struct apic apic_x2apic_phys __ro_after_init = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .calc_dest_apicid = apic_default_calc_apicid, .send_IPI = x2apic_send_IPI, .send_IPI_mask = x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 0d57bb9079c9..5832df6d9c37 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -525,16 +525,9 @@ static void uv_init_apic_ldr(void) { } -static int -uv_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata, - unsigned int *apicid) +static u32 apic_uv_calc_apicid(unsigned int cpu) { - int ret = default_cpu_mask_to_apicid(mask, irqdata, apicid); - - if (!ret) - *apicid |= uv_apicid_hibits; - - return ret; + return apic_default_calc_apicid(cpu) | uv_apicid_hibits; } static unsigned int x2apic_get_apic_id(unsigned long x) @@ -547,7 +540,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x) return id; } -static unsigned long set_apic_id(unsigned int id) +static u32 set_apic_id(unsigned int id) { /* CHECKME: Do we need to mask out the xapic extra bits? */ return id; @@ -584,12 +577,10 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* Physical */ - .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, - .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = uv_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -602,7 +593,7 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = set_apic_id, - .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, + .calc_dest_apicid = apic_uv_calc_apicid, .send_IPI = uv_send_IPI_one, .send_IPI_mask = uv_send_IPI_mask, diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 8f5cb2c7060c..86c4439f9d74 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -114,6 +114,7 @@ static void make_8259A_irq(unsigned int irq) io_apic_irqs &= ~(1<<irq); irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq); enable_irq(irq); + lapic_assign_legacy_vector(irq, true); } /* diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 6107ee1cb8d5..723fa9782186 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -225,7 +225,7 @@ idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sy idt_init_desc(&desc, t); write_idt_entry(idt, t->vector, &desc); if (sys) - set_bit(t->vector, used_vectors); + set_bit(t->vector, system_vectors); } } @@ -313,14 +313,14 @@ void __init idt_setup_apic_and_irq_gates(void) idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true); - for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) { + for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) { entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); set_intr_gate(i, entry); } - for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { + for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { #ifdef CONFIG_X86_LOCAL_APIC - set_bit(i, used_vectors); + set_bit(i, system_vectors); set_intr_gate(i, spurious_interrupt); #else entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); @@ -358,7 +358,7 @@ void idt_invalidate(void *addr) void __init update_intr_gate(unsigned int n, const void *addr) { - if (WARN_ON_ONCE(!test_bit(n, used_vectors))) + if (WARN_ON_ONCE(!test_bit(n, system_vectors))) return; set_intr_gate(n, addr); } @@ -366,6 +366,6 @@ void __init update_intr_gate(unsigned int n, const void *addr) void alloc_intr_gate(unsigned int n, const void *addr) { BUG_ON(n < FIRST_SYSTEM_VECTOR); - if (!test_and_set_bit(n, used_vectors)) + if (!test_and_set_bit(n, system_vectors)) set_intr_gate(n, addr); } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 52089c043160..49cfd9fe7589 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -134,7 +134,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_puts(p, " Machine check polls\n"); #endif #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) - if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) { + if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) { seq_printf(p, "%*s: ", prec, "HYP"); for_each_online_cpu(j) seq_printf(p, "%10u ", @@ -333,105 +333,6 @@ __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs) #ifdef CONFIG_HOTPLUG_CPU - -/* These two declarations are only used in check_irq_vectors_for_cpu_disable() - * below, which is protected by stop_machine(). Putting them on the stack - * results in a stack frame overflow. Dynamically allocating could result in a - * failure so declare these two cpumasks as global. - */ -static struct cpumask affinity_new, online_new; - -/* - * This cpu is going to be removed and its vectors migrated to the remaining - * online cpus. Check to see if there are enough vectors in the remaining cpus. - * This function is protected by stop_machine(). - */ -int check_irq_vectors_for_cpu_disable(void) -{ - unsigned int this_cpu, vector, this_count, count; - struct irq_desc *desc; - struct irq_data *data; - int cpu; - - this_cpu = smp_processor_id(); - cpumask_copy(&online_new, cpu_online_mask); - cpumask_clear_cpu(this_cpu, &online_new); - - this_count = 0; - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - desc = __this_cpu_read(vector_irq[vector]); - if (IS_ERR_OR_NULL(desc)) - continue; - /* - * Protect against concurrent action removal, affinity - * changes etc. - */ - raw_spin_lock(&desc->lock); - data = irq_desc_get_irq_data(desc); - cpumask_copy(&affinity_new, - irq_data_get_affinity_mask(data)); - cpumask_clear_cpu(this_cpu, &affinity_new); - - /* Do not count inactive or per-cpu irqs. */ - if (!irq_desc_has_action(desc) || irqd_is_per_cpu(data)) { - raw_spin_unlock(&desc->lock); - continue; - } - - raw_spin_unlock(&desc->lock); - /* - * A single irq may be mapped to multiple cpu's - * vector_irq[] (for example IOAPIC cluster mode). In - * this case we have two possibilities: - * - * 1) the resulting affinity mask is empty; that is - * this the down'd cpu is the last cpu in the irq's - * affinity mask, or - * - * 2) the resulting affinity mask is no longer a - * subset of the online cpus but the affinity mask is - * not zero; that is the down'd cpu is the last online - * cpu in a user set affinity mask. - */ - if (cpumask_empty(&affinity_new) || - !cpumask_subset(&affinity_new, &online_new)) - this_count++; - } - /* No need to check any further. */ - if (!this_count) - return 0; - - count = 0; - for_each_online_cpu(cpu) { - if (cpu == this_cpu) - continue; - /* - * We scan from FIRST_EXTERNAL_VECTOR to first system - * vector. If the vector is marked in the used vectors - * bitmap or an irq is assigned to it, we don't count - * it as available. - * - * As this is an inaccurate snapshot anyway, we can do - * this w/o holding vector_lock. - */ - for (vector = FIRST_EXTERNAL_VECTOR; - vector < FIRST_SYSTEM_VECTOR; vector++) { - if (!test_bit(vector, used_vectors) && - IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) { - if (++count == this_count) - return 0; - } - } - } - - if (count < this_count) { - pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n", - this_cpu, this_count, count); - return -ERANGE; - } - return 0; -} - /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 1e4094eba15e..8da3e909e967 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -61,9 +61,6 @@ void __init init_ISA_irqs(void) struct irq_chip *chip = legacy_pic->chip; int i; -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) - init_bsp_APIC(); -#endif legacy_pic->init(0); for (i = 0; i < nr_legacy_irqs(); i++) @@ -94,6 +91,7 @@ void __init native_init_IRQ(void) x86_init.irqs.pre_vector_init(); idt_setup_apic_and_irq_gates(); + lapic_assign_system_vectors(); if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) setup_irq(2, &irq2); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0957dd73d127..82559867e0a9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -136,18 +136,6 @@ RESERVE_BRK(dmi_alloc, 65536); static __initdata unsigned long _brk_start = (unsigned long)__brk_base; unsigned long _brk_end = (unsigned long)__brk_base; -#ifdef CONFIG_X86_64 -int default_cpu_present_to_apicid(int mps_cpu) -{ - return __default_cpu_present_to_apicid(mps_cpu); -} - -int default_check_phys_apicid_present(int phys_apicid) -{ - return __default_check_phys_apicid_present(phys_apicid); -} -#endif - struct boot_params boot_params; /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ad59edd84de7..92aadfa30d61 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -254,14 +254,14 @@ static void notrace start_secondary(void *unused) check_tsc_sync_target(); /* - * Lock vector_lock and initialize the vectors on this cpu - * before setting the cpu online. We must set it online with - * vector_lock held to prevent a concurrent setup/teardown - * from seeing a half valid vector space. + * Lock vector_lock, set CPU online and bring the vector + * allocator online. Online must be set with vector_lock held + * to prevent a concurrent irq setup/teardown from seeing a + * half valid vector space. */ lock_vector_lock(); - setup_vector_irq(smp_processor_id()); set_cpu_online(smp_processor_id(), true); + lapic_online(); unlock_vector_lock(); cpu_set_state_online(smp_processor_id()); x86_platform.nmi_init(); @@ -1190,17 +1190,10 @@ static __init void disable_smp(void) cpumask_set_cpu(0, topology_core_cpumask(0)); } -enum { - SMP_OK, - SMP_NO_CONFIG, - SMP_NO_APIC, - SMP_FORCE_UP, -}; - /* * Various sanity checks. */ -static int __init smp_sanity_check(unsigned max_cpus) +static void __init smp_sanity_check(void) { preempt_disable(); @@ -1238,16 +1231,6 @@ static int __init smp_sanity_check(unsigned max_cpus) } /* - * If we couldn't find an SMP configuration at boot time, - * get out of here now! - */ - if (!smp_found_config && !acpi_lapic) { - preempt_enable(); - pr_notice("SMP motherboard not detected\n"); - return SMP_NO_CONFIG; - } - - /* * Should not be necessary because the MP table should list the boot * CPU too, but we do it for the sake of robustness anyway. */ @@ -1257,29 +1240,6 @@ static int __init smp_sanity_check(unsigned max_cpus) physid_set(hard_smp_processor_id(), phys_cpu_present_map); } preempt_enable(); - - /* - * If we couldn't find a local APIC, then get out of here now! - */ - if (APIC_INTEGRATED(boot_cpu_apic_version) && - !boot_cpu_has(X86_FEATURE_APIC)) { - if (!disable_apic) { - pr_err("BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); - } - return SMP_NO_APIC; - } - - /* - * If SMP should be disabled, then really disable it! - */ - if (!max_cpus) { - pr_info("SMP mode deactivated\n"); - return SMP_FORCE_UP; - } - - return SMP_OK; } static void __init smp_cpu_index_default(void) @@ -1294,9 +1254,18 @@ static void __init smp_cpu_index_default(void) } } +static void __init smp_get_logical_apicid(void) +{ + if (x2apic_mode) + cpu0_logical_apicid = apic_read(APIC_LDR); + else + cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); +} + /* - * Prepare for SMP bootup. The MP table or ACPI has been read - * earlier. Just do some sanity checking here and enable APIC mode. + * Prepare for SMP bootup. + * @max_cpus: configured maximum number of CPUs, It is a legacy parameter + * for common interface support. */ void __init native_smp_prepare_cpus(unsigned int max_cpus) { @@ -1328,31 +1297,27 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_cpu_sibling_map(0); - switch (smp_sanity_check(max_cpus)) { - case SMP_NO_CONFIG: - disable_smp(); - if (APIC_init_uniprocessor()) - pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); - return; - case SMP_NO_APIC: + smp_sanity_check(); + + switch (apic_intr_mode) { + case APIC_PIC: + case APIC_VIRTUAL_WIRE_NO_CONFIG: disable_smp(); return; - case SMP_FORCE_UP: + case APIC_SYMMETRIC_IO_NO_ROUTING: disable_smp(); - apic_bsp_setup(false); + /* Setup local timer */ + x86_init.timers.setup_percpu_clockev(); return; - case SMP_OK: + case APIC_VIRTUAL_WIRE: + case APIC_SYMMETRIC_IO: break; } - if (read_apic_id() != boot_cpu_physical_apicid) { - panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - read_apic_id(), boot_cpu_physical_apicid); - /* Or can we switch back to PIC here? */ - } + /* Setup local timer */ + x86_init.timers.setup_percpu_clockev(); - default_setup_apic_routing(); - cpu0_logical_apicid = apic_bsp_setup(false); + smp_get_logical_apicid(); pr_info("CPU0: "); print_cpu_info(&cpu_data(0)); @@ -1395,7 +1360,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) nmi_selftest(); impress_friends(); - setup_ioapic_dest(); mtrr_aps_init(); } @@ -1554,13 +1518,14 @@ void cpu_disable_common(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); + lapic_offline(); } int native_cpu_disable(void) { int ret; - ret = check_irq_vectors_for_cpu_disable(); + ret = lapic_can_unplug_cpu(); if (ret) return ret; diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 879af864d99a..749d189f8cd4 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -85,6 +85,11 @@ void __init hpet_time_init(void) static __init void x86_late_time_init(void) { x86_init.timers.timer_init(); + /* + * After PIT/HPET timers init, select and setup + * the final interrupt mode for delivering IRQs. + */ + x86_init.irqs.intr_mode_init(); tsc_init(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 67db4f43309e..a5791f3d3f84 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -71,7 +71,7 @@ #include <asm/proto.h> #endif -DECLARE_BITMAP(used_vectors, NR_VECTORS); +DECLARE_BITMAP(system_vectors, NR_VECTORS); static inline void cond_local_irq_enable(struct pt_regs *regs) { diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index b034b1b14b9c..44685fb2a192 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -26,9 +26,6 @@ #define TOPOLOGY_REGISTER_OFFSET 0x10 -/* Flag below is initialized once during vSMP PCI initialization. */ -static int irq_routing_comply = 1; - #if defined CONFIG_PCI && defined CONFIG_PARAVIRT /* * Interrupt control on vSMPowered systems: @@ -105,9 +102,6 @@ static void __init set_vsmp_pv_ops(void) if (cap & ctl & BIT(8)) { ctl &= ~BIT(8); - /* Interrupt routing set to ignore */ - irq_routing_comply = 0; - #ifdef CONFIG_PROC_FS /* Don't let users change irq affinity via procfs */ no_irq_affinity = 1; @@ -211,23 +205,10 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) return hard_smp_processor_id() >> index_msb; } -/* - * In vSMP, all cpus should be capable of handling interrupts, regardless of - * the APIC used. - */ -static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask) -{ - cpumask_setall(retmask); -} - static void vsmp_apic_post_init(void) { /* need to update phys_pkg_id */ apic->phys_pkg_id = apicid_phys_pkg_id; - - if (!irq_routing_comply) - apic->vector_allocation_domain = fill_vector_allocation_domain; } void __init vsmp_init(void) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index a088b2c47f73..a7889b93e438 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -55,6 +55,7 @@ struct x86_init_ops x86_init __initdata = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, .trap_init = x86_init_noop, + .intr_mode_init = apic_intr_mode_init }, .oem = { diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 03fc397335b7..5f6fd860820a 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -127,10 +127,11 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq, * Re-target the irq to the specified CPU and enable the specified MMR located * on the specified blade to allow the sending of MSIs to the specified CPU. */ -static void uv_domain_activate(struct irq_domain *domain, - struct irq_data *irq_data) +static int uv_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool early) { uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); + return 0; } /* diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 30434b8708f2..6b830d4cb4c8 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -31,7 +31,7 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) return 0xfd; } -static unsigned long xen_set_apic_id(unsigned int x) +static u32 xen_set_apic_id(unsigned int x) { WARN_ON(1); return x; @@ -161,12 +161,10 @@ static struct apic xen_pv_apic = { /* .irq_delivery_mode - used in native_compose_msi_msg only */ /* .irq_dest_mode - used in native_compose_msi_msg only */ - .target_cpus = default_target_cpus, .disable_esr = 0, /* .dest_logical - default_send_IPI_ use it but we use our own. */ .check_apicid_used = default_check_apicid_used, /* Used on 32-bit */ - .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = xen_noop, /* setup_local_APIC calls it */ .ioapic_phys_id_map = default_ioapic_phys_id_map, /* Used on 32-bit */ @@ -179,7 +177,7 @@ static struct apic xen_pv_apic = { .get_apic_id = xen_get_apic_id, .set_apic_id = xen_set_apic_id, /* Can be NULL on 32-bit. */ - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .calc_dest_apicid = apic_flat_calc_apicid, #ifdef CONFIG_SMP .send_IPI_mask = xen_send_IPI_mask, diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index d4396e27b1fb..7b3b17f2ab50 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1231,6 +1231,7 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_platform.get_nmi_reason = xen_get_nmi_reason; x86_init.resources.memory_setup = xen_memory_setup; + x86_init.irqs.intr_mode_init = x86_init_noop; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c index 033258634b8c..b5843fe6a44d 100644 --- a/drivers/gpio/gpio-xgene-sb.c +++ b/drivers/gpio/gpio-xgene-sb.c @@ -140,8 +140,9 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio) return irq_create_fwspec_mapping(&fwspec); } -static void xgene_gpio_sb_domain_activate(struct irq_domain *d, - struct irq_data *irq_data) +static int xgene_gpio_sb_domain_activate(struct irq_domain *d, + struct irq_data *irq_data, + bool early) { struct xgene_gpio_sb *priv = d->host_data; u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq); @@ -150,11 +151,12 @@ static void xgene_gpio_sb_domain_activate(struct irq_domain *d, dev_err(priv->gc.parent, "Unable to configure XGene GPIO standby pin %d as IRQ\n", gpio); - return; + return -ENOSPC; } xgene_gpio_set_bit(&priv->gc, priv->regs + MPA_GPIO_SEL_LO, gpio * 2, 1); + return 0; } static void xgene_gpio_sb_domain_deactivate(struct irq_domain *d, diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 8e8874d23717..9c848e36f209 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4173,16 +4173,26 @@ static void irq_remapping_free(struct irq_domain *domain, unsigned int virq, irq_domain_free_irqs_common(domain, virq, nr_irqs); } -static void irq_remapping_activate(struct irq_domain *domain, - struct irq_data *irq_data) +static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, + struct amd_ir_data *ir_data, + struct irq_2_irte *irte_info, + struct irq_cfg *cfg); + +static int irq_remapping_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool early) { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; + struct irq_cfg *cfg = irqd_cfg(irq_data); - if (iommu) - iommu->irte_ops->activate(data->entry, irte_info->devid, - irte_info->index); + if (!iommu) + return 0; + + iommu->irte_ops->activate(data->entry, irte_info->devid, + irte_info->index); + amd_ir_update_irte(irq_data, iommu, data, irte_info, cfg); + return 0; } static void irq_remapping_deactivate(struct irq_domain *domain, @@ -4269,6 +4279,22 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data); } + +static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, + struct amd_ir_data *ir_data, + struct irq_2_irte *irte_info, + struct irq_cfg *cfg) +{ + + /* + * Atomically updates the IRTE with the new destination, vector + * and flushes the interrupt entry cache. + */ + iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid, + irte_info->index, cfg->vector, + cfg->dest_apicid); +} + static int amd_ir_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -4286,13 +4312,7 @@ static int amd_ir_set_affinity(struct irq_data *data, if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) return ret; - /* - * Atomically updates the IRTE with the new destination, vector - * and flushes the interrupt entry cache. - */ - iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid, - irte_info->index, cfg->vector, cfg->dest_apicid); - + amd_ir_update_irte(data, iommu, ir_data, irte_info, cfg); /* * After this point, all the interrupts will start arriving * at the new destination. So, time to cleanup the previous diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 25842b566c39..76a193c7fcfc 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -1122,6 +1122,24 @@ struct irq_remap_ops intel_irq_remap_ops = { .get_irq_domain = intel_get_irq_domain, }; +static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) +{ + struct intel_ir_data *ir_data = irqd->chip_data; + struct irte *irte = &ir_data->irte_entry; + struct irq_cfg *cfg = irqd_cfg(irqd); + + /* + * Atomically updates the IRTE with the new destination, vector + * and flushes the interrupt entry cache. + */ + irte->vector = cfg->vector; + irte->dest_id = IRTE_DEST(cfg->dest_apicid); + + /* Update the hardware only if the interrupt is in remapped mode. */ + if (!force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) + modify_irte(&ir_data->irq_2_iommu, irte); +} + /* * Migrate the IO-APIC irq in the presence of intr-remapping. * @@ -1140,27 +1158,15 @@ static int intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct intel_ir_data *ir_data = data->chip_data; - struct irte *irte = &ir_data->irte_entry; - struct irq_cfg *cfg = irqd_cfg(data); struct irq_data *parent = data->parent_data; + struct irq_cfg *cfg = irqd_cfg(data); int ret; ret = parent->chip->irq_set_affinity(parent, mask, force); if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) return ret; - /* - * Atomically updates the IRTE with the new destination, vector - * and flushes the interrupt entry cache. - */ - irte->vector = cfg->vector; - irte->dest_id = IRTE_DEST(cfg->dest_apicid); - - /* Update the hardware only if the interrupt is in remapped mode. */ - if (ir_data->irq_2_iommu.mode == IRQ_REMAPPING) - modify_irte(&ir_data->irq_2_iommu, irte); - + intel_ir_reconfigure_irte(data, false); /* * After this point, all the interrupts will start arriving * at the new destination. So, time to cleanup the previous @@ -1390,12 +1396,11 @@ static void intel_irq_remapping_free(struct irq_domain *domain, irq_domain_free_irqs_common(domain, virq, nr_irqs); } -static void intel_irq_remapping_activate(struct irq_domain *domain, - struct irq_data *irq_data) +static int intel_irq_remapping_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool early) { - struct intel_ir_data *data = irq_data->chip_data; - - modify_irte(&data->irq_2_iommu, &data->irte_entry); + intel_ir_reconfigure_irte(irq_data, true); + return 0; } static void intel_irq_remapping_deactivate(struct irq_domain *domain, diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index e88395605e32..e2339af8054c 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2209,8 +2209,8 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, return 0; } -static void its_irq_domain_activate(struct irq_domain *domain, - struct irq_data *d) +static int its_irq_domain_activate(struct irq_domain *domain, + struct irq_data *d, bool early) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); u32 event = its_get_event_id(d); @@ -2228,6 +2228,7 @@ static void its_irq_domain_activate(struct irq_domain *domain, /* Map the GIC IRQ and event to the device */ its_send_mapti(its_dev, d->hwirq, event); + return 0; } static void its_irq_domain_deactivate(struct irq_domain *domain, @@ -2701,8 +2702,8 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq return err; } -static void its_vpe_irq_domain_activate(struct irq_domain *domain, - struct irq_data *d) +static int its_vpe_irq_domain_activate(struct irq_domain *domain, + struct irq_data *d, bool early) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); @@ -2710,6 +2711,7 @@ static void its_vpe_irq_domain_activate(struct irq_domain *domain, vpe->col_idx = cpumask_first(cpu_online_mask); its_send_vmapp(vpe, true); its_send_vinvall(vpe); + return 0; } static void its_vpe_irq_domain_deactivate(struct irq_domain *domain, diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 496ed9130600..e06607167858 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1441,6 +1441,8 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, pci_msi_domain_update_chip_ops(info); info->flags |= MSI_FLAG_ACTIVATE_EARLY; + if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE)) + info->flags |= MSI_FLAG_MUST_REACTIVATE; domain = msi_create_irq_domain(fwnode, info, parent); if (!domain) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 50299ad96659..02b66588cac6 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -289,13 +289,14 @@ static int stm32_gpio_domain_translate(struct irq_domain *d, return 0; } -static void stm32_gpio_domain_activate(struct irq_domain *d, - struct irq_data *irq_data) +static int stm32_gpio_domain_activate(struct irq_domain *d, + struct irq_data *irq_data, bool early) { struct stm32_gpio_bank *bank = d->host_data; struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent); regmap_field_write(pctl->irqmux[irq_data->hwirq], bank->bank_nr); + return 0; } static int stm32_gpio_domain_alloc(struct irq_domain *d, diff --git a/include/linux/irq.h b/include/linux/irq.h index 4536286cc4d2..b01d06db9101 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1114,6 +1114,28 @@ static inline u32 irq_reg_readl(struct irq_chip_generic *gc, return readl(gc->reg_base + reg_offset); } +struct irq_matrix; +struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits, + unsigned int alloc_start, + unsigned int alloc_end); +void irq_matrix_online(struct irq_matrix *m); +void irq_matrix_offline(struct irq_matrix *m); +void irq_matrix_assign_system(struct irq_matrix *m, unsigned int bit, bool replace); +int irq_matrix_reserve_managed(struct irq_matrix *m, const struct cpumask *msk); +void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk); +int irq_matrix_alloc_managed(struct irq_matrix *m, unsigned int cpu); +void irq_matrix_reserve(struct irq_matrix *m); +void irq_matrix_remove_reserved(struct irq_matrix *m); +int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, + bool reserved, unsigned int *mapped_cpu); +void irq_matrix_free(struct irq_matrix *m, unsigned int cpu, + unsigned int bit, bool managed); +void irq_matrix_assign(struct irq_matrix *m, unsigned int bit); +unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown); +unsigned int irq_matrix_allocated(struct irq_matrix *m); +unsigned int irq_matrix_reserved(struct irq_matrix *m); +void irq_matrix_debug_show(struct seq_file *sf, struct irq_matrix *m, int ind); + /* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */ #define INVALID_HWIRQ (~0UL) irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu); diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index b6084898d330..60e3100b0809 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -94,6 +94,7 @@ struct irq_desc { #endif #ifdef CONFIG_GENERIC_IRQ_DEBUGFS struct dentry *debugfs_file; + const char *dev_name; #endif #ifdef CONFIG_SPARSE_IRQ struct rcu_head rcu; diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index b1037dfc47e4..0d6f05cab0fe 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -41,6 +41,7 @@ struct of_device_id; struct irq_chip; struct irq_data; struct cpumask; +struct seq_file; /* Number of irqs reserved for a legacy isa controller */ #define NUM_ISA_INTERRUPTS 16 @@ -105,18 +106,21 @@ struct irq_domain_ops { int (*xlate)(struct irq_domain *d, struct device_node *node, const u32 *intspec, unsigned int intsize, unsigned long *out_hwirq, unsigned int *out_type); - #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /* extended V2 interfaces to support hierarchy irq_domains */ int (*alloc)(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs, void *arg); void (*free)(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs); - void (*activate)(struct irq_domain *d, struct irq_data *irq_data); + int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early); void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type); #endif +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + void (*debug_show)(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind); +#endif }; extern struct irq_domain_ops irq_generic_chip_ops; @@ -438,7 +442,7 @@ extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, bool realloc, const struct cpumask *affinity); extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); -extern void irq_domain_activate_irq(struct irq_data *irq_data); +extern int irq_domain_activate_irq(struct irq_data *irq_data, bool early); extern void irq_domain_deactivate_irq(struct irq_data *irq_data); static inline int irq_domain_alloc_irqs(struct irq_domain *domain, @@ -508,8 +512,6 @@ static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain); #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ -static inline void irq_domain_activate_irq(struct irq_data *data) { } -static inline void irq_domain_deactivate_irq(struct irq_data *data) { } static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) { @@ -558,8 +560,6 @@ irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } -static inline void irq_domain_activate_irq(struct irq_data *data) { } -static inline void irq_domain_deactivate_irq(struct irq_data *data) { } static inline struct irq_domain *irq_find_matching_fwnode( struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token) { diff --git a/include/linux/msi.h b/include/linux/msi.h index cdd069cf9ed8..1f1bbb5b4679 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -284,6 +284,11 @@ enum { MSI_FLAG_PCI_MSIX = (1 << 3), /* Needs early activate, required for PCI */ MSI_FLAG_ACTIVATE_EARLY = (1 << 4), + /* + * Must reactivate when irq is started even when + * MSI_FLAG_ACTIVATE_EARLY has been set. + */ + MSI_FLAG_MUST_REACTIVATE = (1 << 5), }; int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, diff --git a/include/trace/events/irq_matrix.h b/include/trace/events/irq_matrix.h new file mode 100644 index 000000000000..267d4cbbf360 --- /dev/null +++ b/include/trace/events/irq_matrix.h @@ -0,0 +1,201 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq_matrix + +#if !defined(_TRACE_IRQ_MATRIX_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IRQ_MATRIX_H + +#include <linux/tracepoint.h> + +struct irq_matrix; +struct cpumap; + +DECLARE_EVENT_CLASS(irq_matrix_global, + + TP_PROTO(struct irq_matrix *matrix), + + TP_ARGS(matrix), + + TP_STRUCT__entry( + __field( unsigned int, online_maps ) + __field( unsigned int, global_available ) + __field( unsigned int, global_reserved ) + __field( unsigned int, total_allocated ) + ), + + TP_fast_assign( + __entry->online_maps = matrix->online_maps; + __entry->global_available = matrix->global_available; + __entry->global_reserved = matrix->global_reserved; + __entry->total_allocated = matrix->total_allocated; + ), + + TP_printk("online_maps=%d global_avl=%u, global_rsvd=%u, total_alloc=%u", + __entry->online_maps, __entry->global_available, + __entry->global_reserved, __entry->total_allocated) +); + +DECLARE_EVENT_CLASS(irq_matrix_global_update, + + TP_PROTO(int bit, struct irq_matrix *matrix), + + TP_ARGS(bit, matrix), + + TP_STRUCT__entry( + __field( int, bit ) + __field( unsigned int, online_maps ) + __field( unsigned int, global_available ) + __field( unsigned int, global_reserved ) + __field( unsigned int, total_allocated ) + ), + + TP_fast_assign( + __entry->bit = bit; + __entry->online_maps = matrix->online_maps; + __entry->global_available = matrix->global_available; + __entry->global_reserved = matrix->global_reserved; + __entry->total_allocated = matrix->total_allocated; + ), + + TP_printk("bit=%d online_maps=%d global_avl=%u, global_rsvd=%u, total_alloc=%u", + __entry->bit, __entry->online_maps, + __entry->global_available, __entry->global_reserved, + __entry->total_allocated) +); + +DECLARE_EVENT_CLASS(irq_matrix_cpu, + + TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, + struct cpumap *cmap), + + TP_ARGS(bit, cpu, matrix, cmap), + + TP_STRUCT__entry( + __field( int, bit ) + __field( unsigned int, cpu ) + __field( bool, online ) + __field( unsigned int, available ) + __field( unsigned int, allocated ) + __field( unsigned int, managed ) + __field( unsigned int, online_maps ) + __field( unsigned int, global_available ) + __field( unsigned int, global_reserved ) + __field( unsigned int, total_allocated ) + ), + + TP_fast_assign( + __entry->bit = bit; + __entry->cpu = cpu; + __entry->online = cmap->online; + __entry->available = cmap->available; + __entry->allocated = cmap->allocated; + __entry->managed = cmap->managed; + __entry->online_maps = matrix->online_maps; + __entry->global_available = matrix->global_available; + __entry->global_reserved = matrix->global_reserved; + __entry->total_allocated = matrix->total_allocated; + ), + + TP_printk("bit=%d cpu=%u online=%d avl=%u alloc=%u managed=%u online_maps=%u global_avl=%u, global_rsvd=%u, total_alloc=%u", + __entry->bit, __entry->cpu, __entry->online, + __entry->available, __entry->allocated, + __entry->managed, __entry->online_maps, + __entry->global_available, __entry->global_reserved, + __entry->total_allocated) +); + +DEFINE_EVENT(irq_matrix_global, irq_matrix_online, + + TP_PROTO(struct irq_matrix *matrix), + + TP_ARGS(matrix) +); + +DEFINE_EVENT(irq_matrix_global, irq_matrix_offline, + + TP_PROTO(struct irq_matrix *matrix), + + TP_ARGS(matrix) +); + +DEFINE_EVENT(irq_matrix_global, irq_matrix_reserve, + + TP_PROTO(struct irq_matrix *matrix), + + TP_ARGS(matrix) +); + +DEFINE_EVENT(irq_matrix_global, irq_matrix_remove_reserved, + + TP_PROTO(struct irq_matrix *matrix), + + TP_ARGS(matrix) +); + +DEFINE_EVENT(irq_matrix_global_update, irq_matrix_assign_system, + + TP_PROTO(int bit, struct irq_matrix *matrix), + + TP_ARGS(bit, matrix) +); + +DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc_reserved, + + TP_PROTO(int bit, unsigned int cpu, + struct irq_matrix *matrix, struct cpumap *cmap), + + TP_ARGS(bit, cpu, matrix, cmap) +); + +DEFINE_EVENT(irq_matrix_cpu, irq_matrix_reserve_managed, + + TP_PROTO(int bit, unsigned int cpu, + struct irq_matrix *matrix, struct cpumap *cmap), + + TP_ARGS(bit, cpu, matrix, cmap) +); + +DEFINE_EVENT(irq_matrix_cpu, irq_matrix_remove_managed, + + TP_PROTO(int bit, unsigned int cpu, + struct irq_matrix *matrix, struct cpumap *cmap), + + TP_ARGS(bit, cpu, matrix, cmap) +); + +DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc_managed, + + TP_PROTO(int bit, unsigned int cpu, + struct irq_matrix *matrix, struct cpumap *cmap), + + TP_ARGS(bit, cpu, matrix, cmap) +); + +DEFINE_EVENT(irq_matrix_cpu, irq_matrix_assign, + + TP_PROTO(int bit, unsigned int cpu, + struct irq_matrix *matrix, struct cpumap *cmap), + + TP_ARGS(bit, cpu, matrix, cmap) +); + +DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc, + + TP_PROTO(int bit, unsigned int cpu, + struct irq_matrix *matrix, struct cpumap *cmap), + + TP_ARGS(bit, cpu, matrix, cmap) +); + +DEFINE_EVENT(irq_matrix_cpu, irq_matrix_free, + + TP_PROTO(int bit, unsigned int cpu, + struct irq_matrix *matrix, struct cpumap *cmap), + + TP_ARGS(bit, cpu, matrix, cmap) +); + + +#endif /* _TRACE_IRQ_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/init/main.c b/init/main.c index 0ee9c6866ada..2fb98a48b6ae 100644 --- a/init/main.c +++ b/init/main.c @@ -664,12 +664,12 @@ asmlinkage __visible void __init start_kernel(void) debug_objects_mem_init(); setup_per_cpu_pageset(); numa_policy_init(); + acpi_early_init(); if (late_time_init) late_time_init(); calibrate_delay(); pidmap_init(); anon_vma_init(); - acpi_early_init(); #ifdef CONFIG_X86 if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index a117adf7084b..89e355866450 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -97,6 +97,12 @@ config HANDLE_DOMAIN_IRQ config IRQ_TIMINGS bool +config GENERIC_IRQ_MATRIX_ALLOCATOR + bool + +config GENERIC_IRQ_RESERVATION_MODE + bool + config IRQ_DOMAIN_DEBUG bool "Expose hardware/virtual IRQ mapping via debugfs" depends on IRQ_DOMAIN && DEBUG_FS diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index ed15d142694b..ff6e352e3a6c 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -14,3 +14,4 @@ obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o obj-$(CONFIG_SMP) += affinity.o obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o +obj-$(CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR) += matrix.o diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index befa671fba64..4e8089b319ae 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -54,7 +54,7 @@ unsigned long probe_irq_on(void) if (desc->irq_data.chip->irq_set_type) desc->irq_data.chip->irq_set_type(&desc->irq_data, IRQ_TYPE_PROBE); - irq_startup(desc, IRQ_NORESEND, IRQ_START_FORCE); + irq_activate_and_startup(desc, IRQ_NORESEND); } raw_spin_unlock_irq(&desc->lock); } diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 5a2ef92c2782..043bfc35b353 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -207,20 +207,24 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) * Catch code which fiddles with enable_irq() on a managed * and potentially shutdown IRQ. Chained interrupt * installment or irq auto probing should not happen on - * managed irqs either. Emit a warning, break the affinity - * and start it up as a normal interrupt. + * managed irqs either. */ if (WARN_ON_ONCE(force)) - return IRQ_STARTUP_NORMAL; + return IRQ_STARTUP_ABORT; /* * The interrupt was requested, but there is no online CPU * in it's affinity mask. Put it into managed shutdown * state and let the cpu hotplug mechanism start it up once * a CPU in the mask becomes available. */ - irqd_set_managed_shutdown(d); return IRQ_STARTUP_ABORT; } + /* + * Managed interrupts have reserved resources, so this should not + * happen. + */ + if (WARN_ON(irq_domain_activate_irq(d, false))) + return IRQ_STARTUP_ABORT; return IRQ_STARTUP_MANAGED; } #else @@ -236,7 +240,9 @@ static int __irq_startup(struct irq_desc *desc) struct irq_data *d = irq_desc_get_irq_data(desc); int ret = 0; - irq_domain_activate_irq(d); + /* Warn if this interrupt is not activated but try nevertheless */ + WARN_ON_ONCE(!irqd_is_activated(d)); + if (d->chip->irq_startup) { ret = d->chip->irq_startup(d); irq_state_clr_disabled(desc); @@ -269,6 +275,7 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force) ret = __irq_startup(desc); break; case IRQ_STARTUP_ABORT: + irqd_set_managed_shutdown(d); return 0; } } @@ -278,6 +285,22 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force) return ret; } +int irq_activate(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + + if (!irqd_affinity_is_managed(d)) + return irq_domain_activate_irq(d, false); + return 0; +} + +void irq_activate_and_startup(struct irq_desc *desc, bool resend) +{ + if (WARN_ON(irq_activate(desc))) + return; + irq_startup(desc, resend, IRQ_START_FORCE); +} + static void __irq_disable(struct irq_desc *desc, bool mask); void irq_shutdown(struct irq_desc *desc) @@ -953,7 +976,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, irq_settings_set_norequest(desc); irq_settings_set_nothread(desc); desc->action = &chained_action; - irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE); + irq_activate_and_startup(desc, IRQ_RESEND); } } diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index c3fdb36dec30..7f608ac39653 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -81,6 +81,8 @@ irq_debug_show_data(struct seq_file *m, struct irq_data *data, int ind) data->domain ? data->domain->name : ""); seq_printf(m, "%*shwirq: 0x%lx\n", ind + 1, "", data->hwirq); irq_debug_show_chip(m, data, ind + 1); + if (data->domain && data->domain->ops && data->domain->ops->debug_show) + data->domain->ops->debug_show(m, NULL, data, ind + 1); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY if (!data->parent_data) return; @@ -149,6 +151,7 @@ static int irq_debug_show(struct seq_file *m, void *p) raw_spin_lock_irq(&desc->lock); data = irq_desc_get_irq_data(desc); seq_printf(m, "handler: %pf\n", desc->handle_irq); + seq_printf(m, "device: %s\n", desc->dev_name); seq_printf(m, "status: 0x%08x\n", desc->status_use_accessors); irq_debug_show_bits(m, 0, desc->status_use_accessors, irqdesc_states, ARRAY_SIZE(irqdesc_states)); @@ -226,6 +229,15 @@ static const struct file_operations dfs_irq_ops = { .release = single_release, }; +void irq_debugfs_copy_devname(int irq, struct device *dev) +{ + struct irq_desc *desc = irq_to_desc(irq); + const char *name = dev_name(dev); + + if (name) + desc->dev_name = kstrdup(name, GFP_KERNEL); +} + void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc) { char name [10]; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 44ed5f8c8759..07d08ca701ec 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -75,6 +75,8 @@ extern void __enable_irq(struct irq_desc *desc); #define IRQ_START_FORCE true #define IRQ_START_COND false +extern int irq_activate(struct irq_desc *desc); +extern void irq_activate_and_startup(struct irq_desc *desc, bool resend); extern int irq_startup(struct irq_desc *desc, bool resend, bool force); extern void irq_shutdown(struct irq_desc *desc); @@ -437,6 +439,18 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) } #endif /* !CONFIG_GENERIC_PENDING_IRQ */ +#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) +static inline int irq_domain_activate_irq(struct irq_data *data, bool early) +{ + irqd_set_activated(data); + return 0; +} +static inline void irq_domain_deactivate_irq(struct irq_data *data) +{ + irqd_clr_activated(data); +} +#endif + #ifdef CONFIG_GENERIC_IRQ_DEBUGFS #include <linux/debugfs.h> @@ -444,7 +458,9 @@ void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc); static inline void irq_remove_debugfs_entry(struct irq_desc *desc) { debugfs_remove(desc->debugfs_file); + kfree(desc->dev_name); } +void irq_debugfs_copy_devname(int irq, struct device *dev); # ifdef CONFIG_IRQ_DOMAIN void irq_domain_debugfs_init(struct dentry *root); # else @@ -459,4 +475,7 @@ static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d) static inline void irq_remove_debugfs_entry(struct irq_desc *d) { } +static inline void irq_debugfs_copy_devname(int irq, struct device *dev) +{ +} #endif /* CONFIG_GENERIC_IRQ_DEBUGFS */ diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 82afb7ed369f..982a3576fb01 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -448,7 +448,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, } } - flags = affinity ? IRQD_AFFINITY_MANAGED : 0; + flags = affinity ? IRQD_AFFINITY_MANAGED | IRQD_MANAGED_SHUTDOWN : 0; mask = NULL; for (i = 0; i < cnt; i++) { @@ -462,6 +462,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, goto err; irq_insert_desc(start + i, desc); irq_sysfs_add(start + i, desc); + irq_add_debugfs_entry(start + i, desc); } bitmap_set(allocated_irqs, start, cnt); return start; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index ac4644e92b49..8de94508251f 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1682,28 +1682,36 @@ void irq_domain_free_irqs_parent(struct irq_domain *domain, } EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); -static void __irq_domain_activate_irq(struct irq_data *irq_data) +static void __irq_domain_deactivate_irq(struct irq_data *irq_data) { if (irq_data && irq_data->domain) { struct irq_domain *domain = irq_data->domain; + if (domain->ops->deactivate) + domain->ops->deactivate(domain, irq_data); if (irq_data->parent_data) - __irq_domain_activate_irq(irq_data->parent_data); - if (domain->ops->activate) - domain->ops->activate(domain, irq_data); + __irq_domain_deactivate_irq(irq_data->parent_data); } } -static void __irq_domain_deactivate_irq(struct irq_data *irq_data) +static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) { - if (irq_data && irq_data->domain) { - struct irq_domain *domain = irq_data->domain; + int ret = 0; - if (domain->ops->deactivate) - domain->ops->deactivate(domain, irq_data); - if (irq_data->parent_data) - __irq_domain_deactivate_irq(irq_data->parent_data); + if (irqd && irqd->domain) { + struct irq_domain *domain = irqd->domain; + + if (irqd->parent_data) + ret = __irq_domain_activate_irq(irqd->parent_data, + early); + if (!ret && domain->ops->activate) { + ret = domain->ops->activate(domain, irqd, early); + /* Rollback in case of error */ + if (ret && irqd->parent_data) + __irq_domain_deactivate_irq(irqd->parent_data); + } } + return ret; } /** @@ -1714,12 +1722,15 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data) * This is the second step to call domain_ops->activate to program interrupt * controllers, so the interrupt could actually get delivered. */ -void irq_domain_activate_irq(struct irq_data *irq_data) +int irq_domain_activate_irq(struct irq_data *irq_data, bool early) { - if (!irqd_is_activated(irq_data)) { - __irq_domain_activate_irq(irq_data); + int ret = 0; + + if (!irqd_is_activated(irq_data)) + ret = __irq_domain_activate_irq(irq_data, early); + if (!ret) irqd_set_activated(irq_data); - } + return ret; } /** @@ -1810,6 +1821,8 @@ irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind) d->revmap_size + d->revmap_direct_max_irq); seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount); seq_printf(m, "%*sflags: 0x%08x\n", ind +1 , "", d->flags); + if (d->ops && d->ops->debug_show) + d->ops->debug_show(m, d, NULL, ind + 1); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY if (!d->parent) return; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 4bff6a10ae8e..24758ffd065d 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -536,7 +536,7 @@ void __enable_irq(struct irq_desc *desc) * time. If it was already started up, then irq_startup() * will invoke irq_enable() under the hood. */ - irq_startup(desc, IRQ_RESEND, IRQ_START_COND); + irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE); break; } default: @@ -1342,6 +1342,21 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) goto out_unlock; } + /* + * Activate the interrupt. That activation must happen + * independently of IRQ_NOAUTOEN. request_irq() can fail + * and the callers are supposed to handle + * that. enable_irq() of an interrupt requested with + * IRQ_NOAUTOEN is not supposed to fail. The activation + * keeps it in shutdown mode, it merily associates + * resources if necessary and if that's not possible it + * fails. Interrupts which are in managed shutdown mode + * will simply ignore that activation request. + */ + ret = irq_activate(desc); + if (ret) + goto out_unlock; + desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ IRQS_ONESHOT | IRQS_WAITING); irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); @@ -1417,7 +1432,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) wake_up_process(new->secondary->thread); register_irq_proc(irq, desc); - irq_add_debugfs_entry(irq, desc); new->dir = NULL; register_handler_proc(irq, new); return 0; diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c new file mode 100644 index 000000000000..a3cbbc8191c5 --- /dev/null +++ b/kernel/irq/matrix.c @@ -0,0 +1,443 @@ +/* + * Copyright (C) 2017 Thomas Gleixner <tglx@linutronix.de> + * + * SPDX-License-Identifier: GPL-2.0 + */ +#include <linux/spinlock.h> +#include <linux/seq_file.h> +#include <linux/bitmap.h> +#include <linux/percpu.h> +#include <linux/cpu.h> +#include <linux/irq.h> + +#define IRQ_MATRIX_SIZE (BITS_TO_LONGS(IRQ_MATRIX_BITS) * sizeof(unsigned long)) + +struct cpumap { + unsigned int available; + unsigned int allocated; + unsigned int managed; + bool online; + unsigned long alloc_map[IRQ_MATRIX_SIZE]; + unsigned long managed_map[IRQ_MATRIX_SIZE]; +}; + +struct irq_matrix { + unsigned int matrix_bits; + unsigned int alloc_start; + unsigned int alloc_end; + unsigned int alloc_size; + unsigned int global_available; + unsigned int global_reserved; + unsigned int systembits_inalloc; + unsigned int total_allocated; + unsigned int online_maps; + struct cpumap __percpu *maps; + unsigned long scratch_map[IRQ_MATRIX_SIZE]; + unsigned long system_map[IRQ_MATRIX_SIZE]; +}; + +#define CREATE_TRACE_POINTS +#include <trace/events/irq_matrix.h> + +/** + * irq_alloc_matrix - Allocate a irq_matrix structure and initialize it + * @matrix_bits: Number of matrix bits must be <= IRQ_MATRIX_BITS + * @alloc_start: From which bit the allocation search starts + * @alloc_end: At which bit the allocation search ends, i.e first + * invalid bit + */ +__init struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits, + unsigned int alloc_start, + unsigned int alloc_end) +{ + struct irq_matrix *m; + + if (matrix_bits > IRQ_MATRIX_BITS) + return NULL; + + m = kzalloc(sizeof(*m), GFP_KERNEL); + if (!m) + return NULL; + + m->matrix_bits = matrix_bits; + m->alloc_start = alloc_start; + m->alloc_end = alloc_end; + m->alloc_size = alloc_end - alloc_start; + m->maps = alloc_percpu(*m->maps); + if (!m->maps) { + kfree(m); + return NULL; + } + return m; +} + +/** + * irq_matrix_online - Bring the local CPU matrix online + * @m: Matrix pointer + */ +void irq_matrix_online(struct irq_matrix *m) +{ + struct cpumap *cm = this_cpu_ptr(m->maps); + + BUG_ON(cm->online); + + bitmap_zero(cm->alloc_map, m->matrix_bits); + cm->available = m->alloc_size - (cm->managed + m->systembits_inalloc); + cm->allocated = 0; + m->global_available += cm->available; + cm->online = true; + m->online_maps++; + trace_irq_matrix_online(m); +} + +/** + * irq_matrix_offline - Bring the local CPU matrix offline + * @m: Matrix pointer + */ +void irq_matrix_offline(struct irq_matrix *m) +{ + struct cpumap *cm = this_cpu_ptr(m->maps); + + /* Update the global available size */ + m->global_available -= cm->available; + cm->online = false; + m->online_maps--; + trace_irq_matrix_offline(m); +} + +static unsigned int matrix_alloc_area(struct irq_matrix *m, struct cpumap *cm, + unsigned int num, bool managed) +{ + unsigned int area, start = m->alloc_start; + unsigned int end = m->alloc_end; + + bitmap_or(m->scratch_map, cm->managed_map, m->system_map, end); + bitmap_or(m->scratch_map, m->scratch_map, cm->alloc_map, end); + area = bitmap_find_next_zero_area(m->scratch_map, end, start, num, 0); + if (area >= end) + return area; + if (managed) + bitmap_set(cm->managed_map, area, num); + else + bitmap_set(cm->alloc_map, area, num); + return area; +} + +/** + * irq_matrix_assign_system - Assign system wide entry in the matrix + * @m: Matrix pointer + * @bit: Which bit to reserve + * @replace: Replace an already allocated vector with a system + * vector at the same bit position. + * + * The BUG_ON()s below are on purpose. If this goes wrong in the + * early boot process, then the chance to survive is about zero. + * If this happens when the system is life, it's not much better. + */ +void irq_matrix_assign_system(struct irq_matrix *m, unsigned int bit, + bool replace) +{ + struct cpumap *cm = this_cpu_ptr(m->maps); + + BUG_ON(bit > m->matrix_bits); + BUG_ON(m->online_maps > 1 || (m->online_maps && !replace)); + + set_bit(bit, m->system_map); + if (replace) { + BUG_ON(!test_and_clear_bit(bit, cm->alloc_map)); + cm->allocated--; + m->total_allocated--; + } + if (bit >= m->alloc_start && bit < m->alloc_end) + m->systembits_inalloc++; + + trace_irq_matrix_assign_system(bit, m); +} + +/** + * irq_matrix_reserve_managed - Reserve a managed interrupt in a CPU map + * @m: Matrix pointer + * @msk: On which CPUs the bits should be reserved. + * + * Can be called for offline CPUs. Note, this will only reserve one bit + * on all CPUs in @msk, but it's not guaranteed that the bits are at the + * same offset on all CPUs + */ +int irq_matrix_reserve_managed(struct irq_matrix *m, const struct cpumask *msk) +{ + unsigned int cpu, failed_cpu; + + for_each_cpu(cpu, msk) { + struct cpumap *cm = per_cpu_ptr(m->maps, cpu); + unsigned int bit; + + bit = matrix_alloc_area(m, cm, 1, true); + if (bit >= m->alloc_end) + goto cleanup; + cm->managed++; + if (cm->online) { + cm->available--; + m->global_available--; + } + trace_irq_matrix_reserve_managed(bit, cpu, m, cm); + } + return 0; +cleanup: + failed_cpu = cpu; + for_each_cpu(cpu, msk) { + if (cpu == failed_cpu) + break; + irq_matrix_remove_managed(m, cpumask_of(cpu)); + } + return -ENOSPC; +} + +/** + * irq_matrix_remove_managed - Remove managed interrupts in a CPU map + * @m: Matrix pointer + * @msk: On which CPUs the bits should be removed + * + * Can be called for offline CPUs + * + * This removes not allocated managed interrupts from the map. It does + * not matter which one because the managed interrupts free their + * allocation when they shut down. If not, the accounting is screwed, + * but all what can be done at this point is warn about it. + */ +void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk) +{ + unsigned int cpu; + + for_each_cpu(cpu, msk) { + struct cpumap *cm = per_cpu_ptr(m->maps, cpu); + unsigned int bit, end = m->alloc_end; + + if (WARN_ON_ONCE(!cm->managed)) + continue; + + /* Get managed bit which are not allocated */ + bitmap_andnot(m->scratch_map, cm->managed_map, cm->alloc_map, end); + + bit = find_first_bit(m->scratch_map, end); + if (WARN_ON_ONCE(bit >= end)) + continue; + + clear_bit(bit, cm->managed_map); + + cm->managed--; + if (cm->online) { + cm->available++; + m->global_available++; + } + trace_irq_matrix_remove_managed(bit, cpu, m, cm); + } +} + +/** + * irq_matrix_alloc_managed - Allocate a managed interrupt in a CPU map + * @m: Matrix pointer + * @cpu: On which CPU the interrupt should be allocated + */ +int irq_matrix_alloc_managed(struct irq_matrix *m, unsigned int cpu) +{ + struct cpumap *cm = per_cpu_ptr(m->maps, cpu); + unsigned int bit, end = m->alloc_end; + + /* Get managed bit which are not allocated */ + bitmap_andnot(m->scratch_map, cm->managed_map, cm->alloc_map, end); + bit = find_first_bit(m->scratch_map, end); + if (bit >= end) + return -ENOSPC; + set_bit(bit, cm->alloc_map); + cm->allocated++; + m->total_allocated++; + trace_irq_matrix_alloc_managed(bit, cpu, m, cm); + return bit; +} + +/** + * irq_matrix_assign - Assign a preallocated interrupt in the local CPU map + * @m: Matrix pointer + * @bit: Which bit to mark + * + * This should only be used to mark preallocated vectors + */ +void irq_matrix_assign(struct irq_matrix *m, unsigned int bit) +{ + struct cpumap *cm = this_cpu_ptr(m->maps); + + if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end)) + return; + if (WARN_ON_ONCE(test_and_set_bit(bit, cm->alloc_map))) + return; + cm->allocated++; + m->total_allocated++; + cm->available--; + m->global_available--; + trace_irq_matrix_assign(bit, smp_processor_id(), m, cm); +} + +/** + * irq_matrix_reserve - Reserve interrupts + * @m: Matrix pointer + * + * This is merily a book keeping call. It increments the number of globally + * reserved interrupt bits w/o actually allocating them. This allows to + * setup interrupt descriptors w/o assigning low level resources to it. + * The actual allocation happens when the interrupt gets activated. + */ +void irq_matrix_reserve(struct irq_matrix *m) +{ + if (m->global_reserved <= m->global_available && + m->global_reserved + 1 > m->global_available) + pr_warn("Interrupt reservation exceeds available resources\n"); + + m->global_reserved++; + trace_irq_matrix_reserve(m); +} + +/** + * irq_matrix_remove_reserved - Remove interrupt reservation + * @m: Matrix pointer + * + * This is merily a book keeping call. It decrements the number of globally + * reserved interrupt bits. This is used to undo irq_matrix_reserve() when the + * interrupt was never in use and a real vector allocated, which undid the + * reservation. + */ +void irq_matrix_remove_reserved(struct irq_matrix *m) +{ + m->global_reserved--; + trace_irq_matrix_remove_reserved(m); +} + +/** + * irq_matrix_alloc - Allocate a regular interrupt in a CPU map + * @m: Matrix pointer + * @msk: Which CPUs to search in + * @reserved: Allocate previously reserved interrupts + * @mapped_cpu: Pointer to store the CPU for which the irq was allocated + */ +int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, + bool reserved, unsigned int *mapped_cpu) +{ + unsigned int cpu; + + for_each_cpu(cpu, msk) { + struct cpumap *cm = per_cpu_ptr(m->maps, cpu); + unsigned int bit; + + if (!cm->online) + continue; + + bit = matrix_alloc_area(m, cm, 1, false); + if (bit < m->alloc_end) { + cm->allocated++; + cm->available--; + m->total_allocated++; + m->global_available--; + if (reserved) + m->global_reserved--; + *mapped_cpu = cpu; + trace_irq_matrix_alloc(bit, cpu, m, cm); + return bit; + } + } + return -ENOSPC; +} + +/** + * irq_matrix_free - Free allocated interrupt in the matrix + * @m: Matrix pointer + * @cpu: Which CPU map needs be updated + * @bit: The bit to remove + * @managed: If true, the interrupt is managed and not accounted + * as available. + */ +void irq_matrix_free(struct irq_matrix *m, unsigned int cpu, + unsigned int bit, bool managed) +{ + struct cpumap *cm = per_cpu_ptr(m->maps, cpu); + + if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end)) + return; + + if (cm->online) { + clear_bit(bit, cm->alloc_map); + cm->allocated--; + m->total_allocated--; + if (!managed) { + cm->available++; + m->global_available++; + } + } + trace_irq_matrix_free(bit, cpu, m, cm); +} + +/** + * irq_matrix_available - Get the number of globally available irqs + * @m: Pointer to the matrix to query + * @cpudown: If true, the local CPU is about to go down, adjust + * the number of available irqs accordingly + */ +unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown) +{ + struct cpumap *cm = this_cpu_ptr(m->maps); + + return m->global_available - cpudown ? cm->available : 0; +} + +/** + * irq_matrix_reserved - Get the number of globally reserved irqs + * @m: Pointer to the matrix to query + */ +unsigned int irq_matrix_reserved(struct irq_matrix *m) +{ + return m->global_reserved; +} + +/** + * irq_matrix_allocated - Get the number of allocated irqs on the local cpu + * @m: Pointer to the matrix to search + * + * This returns number of allocated irqs + */ +unsigned int irq_matrix_allocated(struct irq_matrix *m) +{ + struct cpumap *cm = this_cpu_ptr(m->maps); + + return cm->allocated; +} + +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +/** + * irq_matrix_debug_show - Show detailed allocation information + * @sf: Pointer to the seq_file to print to + * @m: Pointer to the matrix allocator + * @ind: Indentation for the print format + * + * Note, this is a lockless snapshot. + */ +void irq_matrix_debug_show(struct seq_file *sf, struct irq_matrix *m, int ind) +{ + unsigned int nsys = bitmap_weight(m->system_map, m->matrix_bits); + int cpu; + + seq_printf(sf, "Online bitmaps: %6u\n", m->online_maps); + seq_printf(sf, "Global available: %6u\n", m->global_available); + seq_printf(sf, "Global reserved: %6u\n", m->global_reserved); + seq_printf(sf, "Total allocated: %6u\n", m->total_allocated); + seq_printf(sf, "System: %u: %*pbl\n", nsys, m->matrix_bits, + m->system_map); + seq_printf(sf, "%*s| CPU | avl | man | act | vectors\n", ind, " "); + cpus_read_lock(); + for_each_online_cpu(cpu) { + struct cpumap *cm = per_cpu_ptr(m->maps, cpu); + + seq_printf(sf, "%*s %4d %4u %4u %4u %*pbl\n", ind, " ", + cpu, cm->available, cm->managed, cm->allocated, + m->matrix_bits, cm->alloc_map); + } + cpus_read_unlock(); +} +#endif diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 3fa4bd59f569..edb987b2c58d 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -16,6 +16,8 @@ #include <linux/msi.h> #include <linux/slab.h> +#include "internals.h" + /** * alloc_msi_entry - Allocate an initialize msi_entry * @dev: Pointer to the device for which this is allocated @@ -100,13 +102,14 @@ int msi_domain_set_affinity(struct irq_data *irq_data, return ret; } -static void msi_domain_activate(struct irq_domain *domain, - struct irq_data *irq_data) +static int msi_domain_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool early) { struct msi_msg msg; BUG_ON(irq_chip_compose_msi_msg(irq_data, &msg)); irq_chip_write_msi_msg(irq_data, &msg); + return 0; } static void msi_domain_deactivate(struct irq_domain *domain, @@ -373,8 +376,10 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, return ret; } - for (i = 0; i < desc->nvec_used; i++) + for (i = 0; i < desc->nvec_used; i++) { irq_set_msi_desc_off(virq, i, desc); + irq_debugfs_copy_devname(virq + i, dev); + } } if (ops->msi_finish) @@ -396,11 +401,28 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, struct irq_data *irq_data; irq_data = irq_domain_get_irq_data(domain, desc->irq); - irq_domain_activate_irq(irq_data); + ret = irq_domain_activate_irq(irq_data, true); + if (ret) + goto cleanup; + if (info->flags & MSI_FLAG_MUST_REACTIVATE) + irqd_clr_activated(irq_data); } } - return 0; + +cleanup: + for_each_msi_entry(desc, dev) { + struct irq_data *irqd; + + if (desc->irq == virq) + break; + + irqd = irq_domain_get_irq_data(domain, desc->irq); + if (irqd_is_activated(irqd)) + irq_domain_deactivate_irq(irqd); + } + msi_domain_free_irqs(domain, dev); + return ret; } /** |