From 1897bdc4d33167e9036460631d1349e59d841f2d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 13 Nov 2014 13:46:09 +1100 Subject: mmu_notifier: add mmu_notifier_invalidate_range() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This notifier closes an important gap in the current mmu_notifier implementation, the existing callbacks are called too early or too late to reliably manage a non-CPU TLB. Specifically, invalidate_range_start() is called when all pages are still mapped and invalidate_range_end() when all pages are unmapped and potentially freed. This is fine when the users of the mmu_notifiers manage their own SoftTLB, like KVM does. When the TLB is managed in software it is easy to wipe out entries for a given range and prevent new entries to be established until invalidate_range_end is called. But when the user of mmu_notifiers has to manage a hardware TLB it can still wipe out TLB entries in invalidate_range_start, but it can't make sure that no new TLB entries in the given range are established between invalidate_range_start and invalidate_range_end. To avoid silent data corruption the entries in the non-CPU TLB need to be flushed when the pages are unmapped (at this point in time no _new_ TLB entries can be established in the non-CPU TLB) but not yet freed (as the non-CPU TLB may still have _existing_ entries pointing to the pages about to be freed). To fix this problem we need to catch the moment when the Linux VMM flushes remote TLBs (as a non-CPU TLB is not very CPU TLB), as this is the point in time when the pages are unmapped but _not_ yet freed. The mmu_notifier_invalidate_range() function aims to catch that moment. IOMMU code will be one user of the notifier-callback. Currently this is only the AMD IOMMUv2 driver, but its code is about to be more generalized and converted to a generic IOMMU-API extension to fit the needs of similar functionality in other IOMMUs as well. The current attempt in the AMD IOMMUv2 driver to work around the invalidate_range_start/end() shortcoming is to assign an empty page table to the non-CPU TLB between any invalidata_range_start/end calls. With the empty page-table assigned, every page-table walk to re-fill the non-CPU TLB will cause a page-fault reported to the IOMMU driver via an interrupt, possibly causing interrupt storms. The page-fault handler in the AMD IOMMUv2 driver doesn't handle the fault if an invalidate_range_start/end pair is active, it just reports back SUCCESS to the device and let it refault the page. But existing hardware (newer Radeon GPUs) that makes use of this feature don't re-fault indefinitly, after a certain number of faults for the same address the device enters a failure state and needs to be resetted. To avoid the GPUs entering a failure state we need to get rid of the empty-page-table workaround and use the mmu_notifier_invalidate_range() function introduced with this patch. Signed-off-by: Joerg Roedel Reviewed-by: Andrea Arcangeli Reviewed-by: Jérôme Glisse Cc: Peter Zijlstra Cc: Rik van Riel Cc: Hugh Dickins Cc: Mel Gorman Cc: Johannes Weiner Cc: Jay Cornwall Cc: Oded Gabbay Cc: Suravee Suthikulpanit Cc: Jesse Barnes Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Oded Gabbay --- include/linux/mmu_notifier.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 88787bb4b3b9..17907908d1df 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -242,6 +242,11 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, __mmu_notifier_invalidate_range_end(mm, start, end); } +static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ +} + static inline void mmu_notifier_mm_init(struct mm_struct *mm) { mm->mmu_notifier_mm = NULL; @@ -342,6 +347,11 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, { } +static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ +} + static inline void mmu_notifier_mm_init(struct mm_struct *mm) { } -- cgit v1.2.3 From 34ee645e83b60ae3d5955f70ab9ab9a159136673 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 13 Nov 2014 13:46:09 +1100 Subject: mmu_notifier: call mmu_notifier_invalidate_range() from VMM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add calls to the new mmu_notifier_invalidate_range() function to all places in the VMM that need it. Signed-off-by: Joerg Roedel Reviewed-by: Andrea Arcangeli Reviewed-by: Jérôme Glisse Cc: Peter Zijlstra Cc: Rik van Riel Cc: Hugh Dickins Cc: Mel Gorman Cc: Johannes Weiner Cc: Jay Cornwall Cc: Oded Gabbay Cc: Suravee Suthikulpanit Cc: Jesse Barnes Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Oded Gabbay --- include/linux/mmu_notifier.h | 41 +++++++++++++++++++++++++++++++++++++++++ kernel/events/uprobes.c | 2 +- mm/fremap.c | 2 +- mm/huge_memory.c | 9 +++++---- mm/hugetlb.c | 7 ++++++- mm/ksm.c | 4 ++-- mm/memory.c | 3 ++- mm/migrate.c | 3 ++- mm/rmap.c | 2 +- 9 files changed, 61 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 17907908d1df..966da2b4b803 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -284,6 +284,44 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) __young; \ }) +#define ptep_clear_flush_notify(__vma, __address, __ptep) \ +({ \ + unsigned long ___addr = __address & PAGE_MASK; \ + struct mm_struct *___mm = (__vma)->vm_mm; \ + pte_t ___pte; \ + \ + ___pte = ptep_clear_flush(__vma, __address, __ptep); \ + mmu_notifier_invalidate_range(___mm, ___addr, \ + ___addr + PAGE_SIZE); \ + \ + ___pte; \ +}) + +#define pmdp_clear_flush_notify(__vma, __haddr, __pmd) \ +({ \ + unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ + struct mm_struct *___mm = (__vma)->vm_mm; \ + pmd_t ___pmd; \ + \ + ___pmd = pmdp_clear_flush(__vma, __haddr, __pmd); \ + mmu_notifier_invalidate_range(___mm, ___haddr, \ + ___haddr + HPAGE_PMD_SIZE); \ + \ + ___pmd; \ +}) + +#define pmdp_get_and_clear_notify(__mm, __haddr, __pmd) \ +({ \ + unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ + pmd_t ___pmd; \ + \ + ___pmd = pmdp_get_and_clear(__mm, __haddr, __pmd); \ + mmu_notifier_invalidate_range(__mm, ___haddr, \ + ___haddr + HPAGE_PMD_SIZE); \ + \ + ___pmd; \ +}) + /* * set_pte_at_notify() sets the pte _after_ running the notifier. * This is safe to start by updating the secondary MMUs, because the primary MMU @@ -362,6 +400,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define ptep_clear_flush_young_notify ptep_clear_flush_young #define pmdp_clear_flush_young_notify pmdp_clear_flush_young +#define ptep_clear_flush_notify ptep_clear_flush +#define pmdp_clear_flush_notify pmdp_clear_flush +#define pmdp_get_and_clear_notify pmdp_get_and_clear #define set_pte_at_notify set_pte_at #endif /* CONFIG_MMU_NOTIFIER */ diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1d0af8a2c646..bc143cf56cab 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -193,7 +193,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, } flush_cache_page(vma, addr, pte_pfn(*ptep)); - ptep_clear_flush(vma, addr, ptep); + ptep_clear_flush_notify(vma, addr, ptep); set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot)); page_remove_rmap(page); diff --git a/mm/fremap.c b/mm/fremap.c index 72b8fa361433..9129013732d7 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -37,7 +37,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, if (pte_present(pte)) { flush_cache_page(vma, addr, pte_pfn(pte)); - pte = ptep_clear_flush(vma, addr, ptep); + pte = ptep_clear_flush_notify(vma, addr, ptep); page = vm_normal_page(vma, addr, pte); if (page) { if (pte_dirty(pte)) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index de984159cf0b..1d89526ed531 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1036,7 +1036,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, goto out_free_pages; VM_BUG_ON_PAGE(!PageHead(page), page); - pmdp_clear_flush(vma, haddr, pmd); + pmdp_clear_flush_notify(vma, haddr, pmd); /* leave pmd empty until pte is filled */ pgtable = pgtable_trans_huge_withdraw(mm, pmd); @@ -1179,7 +1179,7 @@ alloc: pmd_t entry; entry = mk_huge_pmd(new_page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); - pmdp_clear_flush(vma, haddr, pmd); + pmdp_clear_flush_notify(vma, haddr, pmd); page_add_new_anon_rmap(new_page, vma, haddr); mem_cgroup_commit_charge(new_page, memcg, false); lru_cache_add_active_or_unevictable(new_page, vma); @@ -1512,7 +1512,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, pmd_t entry; ret = 1; if (!prot_numa) { - entry = pmdp_get_and_clear(mm, addr, pmd); + entry = pmdp_get_and_clear_notify(mm, addr, pmd); if (pmd_numa(entry)) entry = pmd_mknonnuma(entry); entry = pmd_modify(entry, newprot); @@ -1644,6 +1644,7 @@ static int __split_huge_page_splitting(struct page *page, * serialize against split_huge_page*. */ pmdp_splitting_flush(vma, address, pmd); + ret = 1; spin_unlock(ptl); } @@ -2834,7 +2835,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, pmd_t _pmd; int i; - pmdp_clear_flush(vma, haddr, pmd); + pmdp_clear_flush_notify(vma, haddr, pmd); /* leave pmd empty until pte is filled */ pgtable = pgtable_trans_huge_withdraw(mm, pmd); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9fd722769927..2e6add04fa1b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2598,8 +2598,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, } set_huge_pte_at(dst, addr, dst_pte, entry); } else { - if (cow) + if (cow) { huge_ptep_set_wrprotect(src, addr, src_pte); + mmu_notifier_invalidate_range(src, mmun_start, + mmun_end); + } entry = huge_ptep_get(src_pte); ptepage = pte_page(entry); get_page(ptepage); @@ -2899,6 +2902,7 @@ retry_avoidcopy: /* Break COW */ huge_ptep_clear_flush(vma, address, ptep); + mmu_notifier_invalidate_range(mm, mmun_start, mmun_end); set_huge_pte_at(mm, address, ptep, make_huge_pte(vma, new_page, 1)); page_remove_rmap(old_page); @@ -3374,6 +3378,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, * and that page table be reused and filled with junk. */ flush_tlb_range(vma, start, end); + mmu_notifier_invalidate_range(mm, start, end); mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); mmu_notifier_invalidate_range_end(mm, start, end); diff --git a/mm/ksm.c b/mm/ksm.c index 6b2e337bc03c..d247efab5073 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -892,7 +892,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, * this assure us that no O_DIRECT can happen after the check * or in the middle of the check. */ - entry = ptep_clear_flush(vma, addr, ptep); + entry = ptep_clear_flush_notify(vma, addr, ptep); /* * Check that no O_DIRECT or similar I/O is in progress on the * page @@ -960,7 +960,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, page_add_anon_rmap(kpage, vma, addr); flush_cache_page(vma, addr, pte_pfn(*ptep)); - ptep_clear_flush(vma, addr, ptep); + ptep_clear_flush_notify(vma, addr, ptep); set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot)); page_remove_rmap(page); diff --git a/mm/memory.c b/mm/memory.c index 3e503831e042..655fd3d34bb0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -238,6 +238,7 @@ static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { tlb->need_flush = 0; tlb_flush(tlb); + mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); #ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb_table_flush(tlb); #endif @@ -2234,7 +2235,7 @@ gotten: * seen in the presence of one thread doing SMC and another * thread doing COW. */ - ptep_clear_flush(vma, address, page_table); + ptep_clear_flush_notify(vma, address, page_table); page_add_new_anon_rmap(new_page, vma, address); mem_cgroup_commit_charge(new_page, memcg, false); lru_cache_add_active_or_unevictable(new_page, vma); diff --git a/mm/migrate.c b/mm/migrate.c index 01439953abf5..41945cb0ca38 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1854,7 +1854,7 @@ fail_putback: */ flush_cache_range(vma, mmun_start, mmun_end); page_add_anon_rmap(new_page, vma, mmun_start); - pmdp_clear_flush(vma, mmun_start, pmd); + pmdp_clear_flush_notify(vma, mmun_start, pmd); set_pmd_at(mm, mmun_start, pmd, entry); flush_tlb_range(vma, mmun_start, mmun_end); update_mmu_cache_pmd(vma, address, &entry); @@ -1862,6 +1862,7 @@ fail_putback: if (page_count(page) != 2) { set_pmd_at(mm, mmun_start, pmd, orig_entry); flush_tlb_range(vma, mmun_start, mmun_end); + mmu_notifier_invalidate_range(mm, mmun_start, mmun_end); update_mmu_cache_pmd(vma, address, &entry); page_remove_rmap(new_page); goto fail_putback; diff --git a/mm/rmap.c b/mm/rmap.c index 19886fb2f13a..d3eb1e02d1c6 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1378,7 +1378,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); - pteval = ptep_clear_flush(vma, address, pte); + pteval = ptep_clear_flush_notify(vma, address, pte); /* If nonlinear, store the file page offset in the pte. */ if (page->index != linear_page_index(vma, address)) { -- cgit v1.2.3 From 0f0a327fa12cd55de5e7f8c05a70ac3d047f405e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 13 Nov 2014 13:46:09 +1100 Subject: mmu_notifier: add the callback for mmu_notifier_invalidate_range() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the mmu_notifier_invalidate_range() calls are in place, add the callback to allow subsystems to register against it. Signed-off-by: Joerg Roedel Reviewed-by: Andrea Arcangeli Reviewed-by: Jérôme Glisse Cc: Peter Zijlstra Cc: Rik van Riel Cc: Hugh Dickins Cc: Mel Gorman Cc: Johannes Weiner Cc: Jay Cornwall Cc: Oded Gabbay Cc: Suravee Suthikulpanit Cc: Jesse Barnes Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Oded Gabbay --- include/linux/mmu_notifier.h | 37 ++++++++++++++++++++++++++++++++----- mm/mmu_notifier.c | 25 +++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 966da2b4b803..94d19f64cecf 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -98,11 +98,11 @@ struct mmu_notifier_ops { /* * invalidate_range_start() and invalidate_range_end() must be * paired and are called only when the mmap_sem and/or the - * locks protecting the reverse maps are held. The subsystem - * must guarantee that no additional references are taken to - * the pages in the range established between the call to - * invalidate_range_start() and the matching call to - * invalidate_range_end(). + * locks protecting the reverse maps are held. If the subsystem + * can't guarantee that no additional references are taken to + * the pages in the range, it has to implement the + * invalidate_range() notifier to remove any references taken + * after invalidate_range_start(). * * Invalidation of multiple concurrent ranges may be * optionally permitted by the driver. Either way the @@ -144,6 +144,29 @@ struct mmu_notifier_ops { void (*invalidate_range_end)(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end); + + /* + * invalidate_range() is either called between + * invalidate_range_start() and invalidate_range_end() when the + * VM has to free pages that where unmapped, but before the + * pages are actually freed, or outside of _start()/_end() when + * a (remote) TLB is necessary. + * + * If invalidate_range() is used to manage a non-CPU TLB with + * shared page-tables, it not necessary to implement the + * invalidate_range_start()/end() notifiers, as + * invalidate_range() alread catches the points in time when an + * external TLB range needs to be flushed. + * + * The invalidate_range() function is called under the ptl + * spin-lock and not allowed to sleep. + * + * Note that this function might be called with just a sub-range + * of what was passed to invalidate_range_start()/end(), if + * called between those functions. + */ + void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm, + unsigned long start, unsigned long end); }; /* @@ -190,6 +213,8 @@ extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end); extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, unsigned long start, unsigned long end); +extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, + unsigned long start, unsigned long end); static inline void mmu_notifier_release(struct mm_struct *mm) { @@ -245,6 +270,8 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end) { + if (mm_has_notifiers(mm)) + __mmu_notifier_invalidate_range(mm, start, end); } static inline void mmu_notifier_mm_init(struct mm_struct *mm) diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 2c8da9825fe3..3b9b3d0741b2 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -193,6 +193,16 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { + /* + * Call invalidate_range here too to avoid the need for the + * subsystem of having to register an invalidate_range_end + * call-back when there is invalidate_range already. Usually a + * subsystem registers either invalidate_range_start()/end() or + * invalidate_range(), so this will be no additional overhead + * (besides the pointer check). + */ + if (mn->ops->invalidate_range) + mn->ops->invalidate_range(mn, mm, start, end); if (mn->ops->invalidate_range_end) mn->ops->invalidate_range_end(mn, mm, start, end); } @@ -200,6 +210,21 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, } EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_end); +void __mmu_notifier_invalidate_range(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct mmu_notifier *mn; + int id; + + id = srcu_read_lock(&srcu); + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { + if (mn->ops->invalidate_range) + mn->ops->invalidate_range(mn, mm, start, end); + } + srcu_read_unlock(&srcu, id); +} +EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range); + static int do_mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm, int take_mmap_sem) -- cgit v1.2.3 From fd3cbdc0d1b5254a2e8793df58c409b469899a3f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 10 Aug 2014 08:53:39 +0200 Subject: jump_label: Fix small typos in the documentation Was reading through the documentation of this code and noticed a few typos, missing commas, etc. Cc: Jason Baron Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Borislav Petkov Cc: Andrew Morton Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Mel Gorman Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 784304b222b3..98f923b6a0ea 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -8,28 +8,28 @@ * Copyright (C) 2011-2012 Peter Zijlstra * * Jump labels provide an interface to generate dynamic branches using - * self-modifying code. Assuming toolchain and architecture support the result - * of a "if (static_key_false(&key))" statement is a unconditional branch (which + * self-modifying code. Assuming toolchain and architecture support, the result + * of a "if (static_key_false(&key))" statement is an unconditional branch (which * defaults to false - and the true block is placed out of line). * * However at runtime we can change the branch target using * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key - * object and for as long as there are references all branches referring to + * object, and for as long as there are references all branches referring to * that particular key will point to the (out of line) true block. * - * Since this relies on modifying code the static_key_slow_{inc,dec}() functions + * Since this relies on modifying code, the static_key_slow_{inc,dec}() functions * must be considered absolute slow paths (machine wide synchronization etc.). - * OTOH, since the affected branches are unconditional their runtime overhead + * OTOH, since the affected branches are unconditional, their runtime overhead * will be absolutely minimal, esp. in the default (off) case where the total * effect is a single NOP of appropriate size. The on case will patch in a jump * to the out-of-line block. * - * When the control is directly exposed to userspace it is prudent to delay the + * When the control is directly exposed to userspace, it is prudent to delay the * decrement to avoid high frequency code modifications which can (and do) * cause significant performance degradation. Struct static_key_deferred and * static_key_slow_dec_deferred() provide for this. * - * Lacking toolchain and or architecture support, it falls back to a simple + * Lacking toolchain and or architecture support, jump labels fall back to a simple * conditional branch. * * struct static_key my_key = STATIC_KEY_INIT_TRUE; @@ -43,8 +43,7 @@ * * Not initializing the key (static data is initialized to 0s anyway) is the * same as using STATIC_KEY_INIT_FALSE. - * -*/ + */ #include #include -- cgit v1.2.3 From fadfe7be6e50de7f03913833b33c56cd8fb66bac Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 1 Aug 2014 14:33:02 +0200 Subject: perf: Add queued work to remove orphaned child events In cases when the owner task exits before the workload and the workload made some forks, all the events stay in until the last workload process exits. Thats' because each child event holds parent reference. We want to release all children events once the parent is gone, because at that time there's no process to read them anyway, so they're just eating resources. This removal races with process exit, which removes all events and fork, which clone events. To be clear of those two, adding work queue to remove orphaned child for context in case such event is detected. Using delayed work queue (with delay == 1), because we queue this work under perf scheduler callbacks. Normal work queue tries to wake up the queue process, which deadlocks on rq->lock in this place. Also preventing clones from abandoned parent event. Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Mark Rutland Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Mark Rutland Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1406896382-18404-4-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 +++ kernel/events/core.c | 87 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 707617a8c0f6..ef5b62bdb103 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -52,6 +52,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -507,6 +508,9 @@ struct perf_event_context { int nr_cgroups; /* cgroup evts */ int nr_branch_stack; /* branch_stack evt */ struct rcu_head rcu_head; + + struct delayed_work orphans_remove; + bool orphans_remove_sched; }; /* diff --git a/kernel/events/core.c b/kernel/events/core.c index bbb3ca22f07c..a25460559b4f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -46,6 +46,8 @@ #include +static struct workqueue_struct *perf_wq; + struct remote_function_call { struct task_struct *p; int (*func)(void *info); @@ -1381,6 +1383,45 @@ out: perf_event__header_size(tmp); } +/* + * User event without the task. + */ +static bool is_orphaned_event(struct perf_event *event) +{ + return event && !is_kernel_event(event) && !event->owner; +} + +/* + * Event has a parent but parent's task finished and it's + * alive only because of children holding refference. + */ +static bool is_orphaned_child(struct perf_event *event) +{ + return is_orphaned_event(event->parent); +} + +static void orphans_remove_work(struct work_struct *work); + +static void schedule_orphans_remove(struct perf_event_context *ctx) +{ + if (!ctx->task || ctx->orphans_remove_sched || !perf_wq) + return; + + if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) { + get_ctx(ctx); + ctx->orphans_remove_sched = true; + } +} + +static int __init perf_workqueue_init(void) +{ + perf_wq = create_singlethread_workqueue("perf"); + WARN(!perf_wq, "failed to create perf workqueue\n"); + return perf_wq ? 0 : -1; +} + +core_initcall(perf_workqueue_init); + static inline int event_filter_match(struct perf_event *event) { @@ -1430,6 +1471,9 @@ event_sched_out(struct perf_event *event, if (event->attr.exclusive || !cpuctx->active_oncpu) cpuctx->exclusive = 0; + if (is_orphaned_child(event)) + schedule_orphans_remove(ctx); + perf_pmu_enable(event->pmu); } @@ -1732,6 +1776,9 @@ event_sched_in(struct perf_event *event, if (event->attr.exclusive) cpuctx->exclusive = 1; + if (is_orphaned_child(event)) + schedule_orphans_remove(ctx); + out: perf_pmu_enable(event->pmu); @@ -3074,6 +3121,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx) INIT_LIST_HEAD(&ctx->flexible_groups); INIT_LIST_HEAD(&ctx->event_list); atomic_set(&ctx->refcount, 1); + INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work); } static struct perf_event_context * @@ -3405,6 +3453,42 @@ static int perf_release(struct inode *inode, struct file *file) return 0; } +/* + * Remove all orphanes events from the context. + */ +static void orphans_remove_work(struct work_struct *work) +{ + struct perf_event_context *ctx; + struct perf_event *event, *tmp; + + ctx = container_of(work, struct perf_event_context, + orphans_remove.work); + + mutex_lock(&ctx->mutex); + list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) { + struct perf_event *parent_event = event->parent; + + if (!is_orphaned_child(event)) + continue; + + perf_remove_from_context(event, true); + + mutex_lock(&parent_event->child_mutex); + list_del_init(&event->child_list); + mutex_unlock(&parent_event->child_mutex); + + free_event(event); + put_event(parent_event); + } + + raw_spin_lock_irq(&ctx->lock); + ctx->orphans_remove_sched = false; + raw_spin_unlock_irq(&ctx->lock); + mutex_unlock(&ctx->mutex); + + put_ctx(ctx); +} + u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) { struct perf_event *child; @@ -7709,7 +7793,8 @@ inherit_event(struct perf_event *parent_event, if (IS_ERR(child_event)) return child_event; - if (!atomic_long_inc_not_zero(&parent_event->refcount)) { + if (is_orphaned_event(parent_event) || + !atomic_long_inc_not_zero(&parent_event->refcount)) { free_event(child_event); return NULL; } -- cgit v1.2.3 From 770eee1fd38c70a009b321f5dbe64358f42511fd Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 11 Aug 2014 21:27:12 +0200 Subject: perf/x86: Fix data source encoding issues for load latency/precise store This patch fixes issues introuduce by Andi's previous patch 'Revamp PEBS' series. This patch fixes the following: - precise_store_data_hsw() encode the mem op type whenever we can - precise_store_data_hsw set the default data source correctly - 0 is not a valid init value for data source. Define PERF_MEM_NA as the default value This bug was actually introduced by commit 722e76e60f2775c21b087ff12c5e678cf0ebcaaf Author: Stephane Eranian Date: Thu May 15 17:56:44 2014 +0200 fix Haswell precise store data source encoding Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1407785233-32193-4-git-send-email-eranian@google.com Cc: Arnaldo Carvalho de Melo Cc: ak@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 11 +++++++---- include/linux/perf_event.h | 9 ++++++++- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index a9b60f32064f..67919ce0f76a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -113,9 +113,12 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status) union perf_mem_data_src dse; u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK; - dse.val = 0; - dse.mem_op = PERF_MEM_OP_NA; - dse.mem_lvl = PERF_MEM_LVL_NA; + dse.val = PERF_MEM_NA; + + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) + dse.mem_op = PERF_MEM_OP_STORE; + else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW) + dse.mem_op = PERF_MEM_OP_LOAD; /* * L1 info only valid for following events: @@ -126,7 +129,7 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status) * MEM_UOPS_RETIRED.ALL_STORES */ if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0) - return dse.mem_lvl; + return dse.val; if (status & 1) dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ef5b62bdb103..f0a1036b1911 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -608,6 +608,13 @@ struct perf_sample_data { u64 txn; }; +/* default value for data source */ +#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ + PERF_MEM_S(LVL, NA) |\ + PERF_MEM_S(SNOOP, NA) |\ + PERF_MEM_S(LOCK, NA) |\ + PERF_MEM_S(TLB, NA)) + static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr, u64 period) { @@ -620,7 +627,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->regs_user.regs = NULL; data->stack_user_size = 0; data->weight = 0; - data->data_src.val = 0; + data->data_src.val = PERF_MEM_NA; data->txn = 0; } -- cgit v1.2.3 From 2e39465abc4b7856a0ea6fcf4f6b4668bb5db877 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Aug 2014 12:07:15 +0200 Subject: locking: Remove deprecated smp_mb__() barriers Its been a while and there are no in-tree users left, so remove the deprecated barriers. Signed-off-by: Peter Zijlstra Cc: Chen, Gong Cc: Jacob Pan Cc: Joe Perches Cc: John Sullivan Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Srinivas Pandruvada Cc: Theodore Ts'o Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 36 ------------------------------------ include/linux/bitops.h | 20 -------------------- kernel/sched/core.c | 16 ---------------- 3 files changed, 72 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index fef3a809e7cf..5b08a8540ecf 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -3,42 +3,6 @@ #define _LINUX_ATOMIC_H #include -/* - * Provide __deprecated wrappers for the new interface, avoid flag day changes. - * We need the ugly external functions to break header recursion hell. - */ -#ifndef smp_mb__before_atomic_inc -static inline void __deprecated smp_mb__before_atomic_inc(void) -{ - extern void __smp_mb__before_atomic(void); - __smp_mb__before_atomic(); -} -#endif - -#ifndef smp_mb__after_atomic_inc -static inline void __deprecated smp_mb__after_atomic_inc(void) -{ - extern void __smp_mb__after_atomic(void); - __smp_mb__after_atomic(); -} -#endif - -#ifndef smp_mb__before_atomic_dec -static inline void __deprecated smp_mb__before_atomic_dec(void) -{ - extern void __smp_mb__before_atomic(void); - __smp_mb__before_atomic(); -} -#endif - -#ifndef smp_mb__after_atomic_dec -static inline void __deprecated smp_mb__after_atomic_dec(void) -{ - extern void __smp_mb__after_atomic(void); - __smp_mb__after_atomic(); -} -#endif - /** * atomic_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cbc5833fb221..be5fd38bd5a0 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -32,26 +32,6 @@ extern unsigned long __sw_hweight64(__u64 w); */ #include -/* - * Provide __deprecated wrappers for the new interface, avoid flag day changes. - * We need the ugly external functions to break header recursion hell. - */ -#ifndef smp_mb__before_clear_bit -static inline void __deprecated smp_mb__before_clear_bit(void) -{ - extern void __smp_mb__before_atomic(void); - __smp_mb__before_atomic(); -} -#endif - -#ifndef smp_mb__after_clear_bit -static inline void __deprecated smp_mb__after_clear_bit(void) -{ - extern void __smp_mb__after_atomic(void); - __smp_mb__after_atomic(); -} -#endif - #define for_each_set_bit(bit, addr, size) \ for ((bit) = find_first_bit((addr), (size)); \ (bit) < (size); \ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1211575a2208..76c518c9b3a7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -90,22 +90,6 @@ #define CREATE_TRACE_POINTS #include -#ifdef smp_mb__before_atomic -void __smp_mb__before_atomic(void) -{ - smp_mb__before_atomic(); -} -EXPORT_SYMBOL(__smp_mb__before_atomic); -#endif - -#ifdef smp_mb__after_atomic -void __smp_mb__after_atomic(void) -{ - smp_mb__after_atomic(); -} -EXPORT_SYMBOL(__smp_mb__after_atomic); -#endif - void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) { unsigned long delta; -- cgit v1.2.3 From 7608a43d8f2e02f8b532f8e11481d7ecf8b5d3f9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 30 Jul 2014 13:41:54 -0700 Subject: locking/mutexes: Use MUTEX_SPIN_ON_OWNER when appropriate 4badad35 ("locking/mutex: Disable optimistic spinning on some architectures") added a ARCH_SUPPORTS_ATOMIC_RMW flag to disable the mutex optimistic feature on specific archs. Because CONFIG_MUTEX_SPIN_ON_OWNER only depended on DEBUG and SMP, it was ok to have the ->owner field conditional a bit flexible. However by adding a new variable to the matter, we can waste space with the unused field, ie: CONFIG_SMP && (!CONFIG_MUTEX_SPIN_ON_OWNER && !CONFIG_DEBUG_MUTEX). Signed-off-by: Davidlohr Bueso Acked-by: Jason Low Signed-off-by: Peter Zijlstra Cc: aswin@hp.com Cc: Davidlohr Bueso Cc: Heiko Carstens Cc: Jason Low Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Tim Chen Link: http://lkml.kernel.org/r/1406752916-3341-5-git-send-email-davidlohr@hp.com Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 2 +- kernel/locking/mutex.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 8d5535c58cc2..e4c29418f407 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -52,7 +52,7 @@ struct mutex { atomic_t count; spinlock_t wait_lock; struct list_head wait_list; -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) +#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 4115fbf83b12..5cda397607f2 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -16,7 +16,7 @@ #define mutex_remove_waiter(lock, waiter, ti) \ __list_del((waiter)->list.prev, (waiter)->list.next) -#ifdef CONFIG_SMP +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER static inline void mutex_set_owner(struct mutex *lock) { lock->owner = current; -- cgit v1.2.3 From 214e0aed639ef40987bf6159fad303171a6de31e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 30 Jul 2014 13:41:55 -0700 Subject: locking/Documentation: Move locking related docs into Documentation/locking/ Specifically: Documentation/locking/lockdep-design.txt Documentation/locking/lockstat.txt Documentation/locking/mutex-design.txt Documentation/locking/rt-mutex-design.txt Documentation/locking/rt-mutex.txt Documentation/locking/spinlocks.txt Documentation/locking/ww-mutex-design.txt Signed-off-by: Davidlohr Bueso Acked-by: Randy Dunlap Signed-off-by: Peter Zijlstra Cc: jason.low2@hp.com Cc: aswin@hp.com Cc: Alexei Starovoitov Cc: Al Viro Cc: Andrew Morton Cc: Chris Mason Cc: Dan Streetman Cc: David Airlie Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Jason Low Cc: Josef Bacik Cc: Kees Cook Cc: Linus Torvalds Cc: Lubomir Rintel Cc: Masanari Iida Cc: Paul E. McKenney Cc: Randy Dunlap Cc: Tim Chen Cc: Vineet Gupta Cc: fengguang.wu@intel.com Link: http://lkml.kernel.org/r/1406752916-3341-6-git-send-email-davidlohr@hp.com Signed-off-by: Ingo Molnar --- Documentation/00-INDEX | 2 + Documentation/DocBook/kernel-locking.tmpl | 2 +- Documentation/lockdep-design.txt | 286 ----------- Documentation/locking/lockdep-design.txt | 286 +++++++++++ Documentation/locking/lockstat.txt | 178 +++++++ Documentation/locking/mutex-design.txt | 157 ++++++ Documentation/locking/rt-mutex-design.txt | 781 ++++++++++++++++++++++++++++++ Documentation/locking/rt-mutex.txt | 79 +++ Documentation/locking/spinlocks.txt | 167 +++++++ Documentation/locking/ww-mutex-design.txt | 344 +++++++++++++ Documentation/lockstat.txt | 178 ------- Documentation/mutex-design.txt | 157 ------ Documentation/rt-mutex-design.txt | 781 ------------------------------ Documentation/rt-mutex.txt | 79 --- Documentation/spinlocks.txt | 167 ------- Documentation/ww-mutex-design.txt | 344 ------------- MAINTAINERS | 4 +- drivers/gpu/drm/drm_modeset_lock.c | 2 +- include/linux/lockdep.h | 2 +- include/linux/mutex.h | 2 +- include/linux/rwsem.h | 2 +- kernel/locking/mutex.c | 2 +- kernel/locking/rtmutex.c | 2 +- lib/Kconfig.debug | 4 +- 24 files changed, 2005 insertions(+), 2003 deletions(-) delete mode 100644 Documentation/lockdep-design.txt create mode 100644 Documentation/locking/lockdep-design.txt create mode 100644 Documentation/locking/lockstat.txt create mode 100644 Documentation/locking/mutex-design.txt create mode 100644 Documentation/locking/rt-mutex-design.txt create mode 100644 Documentation/locking/rt-mutex.txt create mode 100644 Documentation/locking/spinlocks.txt create mode 100644 Documentation/locking/ww-mutex-design.txt delete mode 100644 Documentation/lockstat.txt delete mode 100644 Documentation/mutex-design.txt delete mode 100644 Documentation/rt-mutex-design.txt delete mode 100644 Documentation/rt-mutex.txt delete mode 100644 Documentation/spinlocks.txt delete mode 100644 Documentation/ww-mutex-design.txt (limited to 'include/linux') diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 27e67a98b7be..1750fcef1ab4 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -287,6 +287,8 @@ local_ops.txt - semantics and behavior of local atomic operations. lockdep-design.txt - documentation on the runtime locking correctness validator. +locking/ + - directory with info about kernel locking primitives lockstat.txt - info on collecting statistics on locks (and contention). lockup-watchdogs.txt diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index e584ee12a1e7..7c9cc4846cb6 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1972,7 +1972,7 @@ machines due to caching. - Documentation/spinlocks.txt: + Documentation/locking/spinlocks.txt: Linus Torvalds' spinlocking tutorial in the kernel sources. diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt deleted file mode 100644 index 5dbc99c04f6e..000000000000 --- a/Documentation/lockdep-design.txt +++ /dev/null @@ -1,286 +0,0 @@ -Runtime locking correctness validator -===================================== - -started by Ingo Molnar -additions by Arjan van de Ven - -Lock-class ----------- - -The basic object the validator operates upon is a 'class' of locks. - -A class of locks is a group of locks that are logically the same with -respect to locking rules, even if the locks may have multiple (possibly -tens of thousands of) instantiations. For example a lock in the inode -struct is one class, while each inode has its own instantiation of that -lock class. - -The validator tracks the 'state' of lock-classes, and it tracks -dependencies between different lock-classes. The validator maintains a -rolling proof that the state and the dependencies are correct. - -Unlike an lock instantiation, the lock-class itself never goes away: when -a lock-class is used for the first time after bootup it gets registered, -and all subsequent uses of that lock-class will be attached to this -lock-class. - -State ------ - -The validator tracks lock-class usage history into 4n + 1 separate state bits: - -- 'ever held in STATE context' -- 'ever held as readlock in STATE context' -- 'ever held with STATE enabled' -- 'ever held as readlock with STATE enabled' - -Where STATE can be either one of (kernel/lockdep_states.h) - - hardirq - - softirq - - reclaim_fs - -- 'ever used' [ == !unused ] - -When locking rules are violated, these state bits are presented in the -locking error messages, inside curlies. A contrived example: - - modprobe/2287 is trying to acquire lock: - (&sio_locks[i].lock){-.-...}, at: [] mutex_lock+0x21/0x24 - - but task is already holding lock: - (&sio_locks[i].lock){-.-...}, at: [] mutex_lock+0x21/0x24 - - -The bit position indicates STATE, STATE-read, for each of the states listed -above, and the character displayed in each indicates: - - '.' acquired while irqs disabled and not in irq context - '-' acquired in irq context - '+' acquired with irqs enabled - '?' acquired in irq context with irqs enabled. - -Unused mutexes cannot be part of the cause of an error. - - -Single-lock state rules: ------------------------- - -A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The -following states are exclusive, and only one of them is allowed to be -set for any lock-class: - - and - and - -The validator detects and reports lock usage that violate these -single-lock state rules. - -Multi-lock dependency rules: ----------------------------- - -The same lock-class must not be acquired twice, because this could lead -to lock recursion deadlocks. - -Furthermore, two locks may not be taken in different order: - - -> - -> - -because this could lead to lock inversion deadlocks. (The validator -finds such dependencies in arbitrary complexity, i.e. there can be any -other locking sequence between the acquire-lock operations, the -validator will still track all dependencies between locks.) - -Furthermore, the following usage based lock dependencies are not allowed -between any two lock-classes: - - -> - -> - -The first rule comes from the fact the a hardirq-safe lock could be -taken by a hardirq context, interrupting a hardirq-unsafe lock - and -thus could result in a lock inversion deadlock. Likewise, a softirq-safe -lock could be taken by an softirq context, interrupting a softirq-unsafe -lock. - -The above rules are enforced for any locking sequence that occurs in the -kernel: when acquiring a new lock, the validator checks whether there is -any rule violation between the new lock and any of the held locks. - -When a lock-class changes its state, the following aspects of the above -dependency rules are enforced: - -- if a new hardirq-safe lock is discovered, we check whether it - took any hardirq-unsafe lock in the past. - -- if a new softirq-safe lock is discovered, we check whether it took - any softirq-unsafe lock in the past. - -- if a new hardirq-unsafe lock is discovered, we check whether any - hardirq-safe lock took it in the past. - -- if a new softirq-unsafe lock is discovered, we check whether any - softirq-safe lock took it in the past. - -(Again, we do these checks too on the basis that an interrupt context -could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which -could lead to a lock inversion deadlock - even if that lock scenario did -not trigger in practice yet.) - -Exception: Nested data dependencies leading to nested locking -------------------------------------------------------------- - -There are a few cases where the Linux kernel acquires more than one -instance of the same lock-class. Such cases typically happen when there -is some sort of hierarchy within objects of the same type. In these -cases there is an inherent "natural" ordering between the two objects -(defined by the properties of the hierarchy), and the kernel grabs the -locks in this fixed order on each of the objects. - -An example of such an object hierarchy that results in "nested locking" -is that of a "whole disk" block-dev object and a "partition" block-dev -object; the partition is "part of" the whole device and as long as one -always takes the whole disk lock as a higher lock than the partition -lock, the lock ordering is fully correct. The validator does not -automatically detect this natural ordering, as the locking rule behind -the ordering is not static. - -In order to teach the validator about this correct usage model, new -versions of the various locking primitives were added that allow you to -specify a "nesting level". An example call, for the block device mutex, -looks like this: - -enum bdev_bd_mutex_lock_class -{ - BD_MUTEX_NORMAL, - BD_MUTEX_WHOLE, - BD_MUTEX_PARTITION -}; - - mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION); - -In this case the locking is done on a bdev object that is known to be a -partition. - -The validator treats a lock that is taken in such a nested fashion as a -separate (sub)class for the purposes of validation. - -Note: When changing code to use the _nested() primitives, be careful and -check really thoroughly that the hierarchy is correctly mapped; otherwise -you can get false positives or false negatives. - -Proof of 100% correctness: --------------------------- - -The validator achieves perfect, mathematical 'closure' (proof of locking -correctness) in the sense that for every simple, standalone single-task -locking sequence that occurred at least once during the lifetime of the -kernel, the validator proves it with a 100% certainty that no -combination and timing of these locking sequences can cause any class of -lock related deadlock. [*] - -I.e. complex multi-CPU and multi-task locking scenarios do not have to -occur in practice to prove a deadlock: only the simple 'component' -locking chains have to occur at least once (anytime, in any -task/context) for the validator to be able to prove correctness. (For -example, complex deadlocks that would normally need more than 3 CPUs and -a very unlikely constellation of tasks, irq-contexts and timings to -occur, can be detected on a plain, lightly loaded single-CPU system as -well!) - -This radically decreases the complexity of locking related QA of the -kernel: what has to be done during QA is to trigger as many "simple" -single-task locking dependencies in the kernel as possible, at least -once, to prove locking correctness - instead of having to trigger every -possible combination of locking interaction between CPUs, combined with -every possible hardirq and softirq nesting scenario (which is impossible -to do in practice). - -[*] assuming that the validator itself is 100% correct, and no other - part of the system corrupts the state of the validator in any way. - We also assume that all NMI/SMM paths [which could interrupt - even hardirq-disabled codepaths] are correct and do not interfere - with the validator. We also assume that the 64-bit 'chain hash' - value is unique for every lock-chain in the system. Also, lock - recursion must not be higher than 20. - -Performance: ------------- - -The above rules require _massive_ amounts of runtime checking. If we did -that for every lock taken and for every irqs-enable event, it would -render the system practically unusably slow. The complexity of checking -is O(N^2), so even with just a few hundred lock-classes we'd have to do -tens of thousands of checks for every event. - -This problem is solved by checking any given 'locking scenario' (unique -sequence of locks taken after each other) only once. A simple stack of -held locks is maintained, and a lightweight 64-bit hash value is -calculated, which hash is unique for every lock chain. The hash value, -when the chain is validated for the first time, is then put into a hash -table, which hash-table can be checked in a lockfree manner. If the -locking chain occurs again later on, the hash table tells us that we -dont have to validate the chain again. - -Troubleshooting: ----------------- - -The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes. -Exceeding this number will trigger the following lockdep warning: - - (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) - -By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical -desktop systems have less than 1,000 lock classes, so this warning -normally results from lock-class leakage or failure to properly -initialize locks. These two problems are illustrated below: - -1. Repeated module loading and unloading while running the validator - will result in lock-class leakage. The issue here is that each - load of the module will create a new set of lock classes for - that module's locks, but module unloading does not remove old - classes (see below discussion of reuse of lock classes for why). - Therefore, if that module is loaded and unloaded repeatedly, - the number of lock classes will eventually reach the maximum. - -2. Using structures such as arrays that have large numbers of - locks that are not explicitly initialized. For example, - a hash table with 8192 buckets where each bucket has its own - spinlock_t will consume 8192 lock classes -unless- each spinlock - is explicitly initialized at runtime, for example, using the - run-time spin_lock_init() as opposed to compile-time initializers - such as __SPIN_LOCK_UNLOCKED(). Failure to properly initialize - the per-bucket spinlocks would guarantee lock-class overflow. - In contrast, a loop that called spin_lock_init() on each lock - would place all 8192 locks into a single lock class. - - The moral of this story is that you should always explicitly - initialize your locks. - -One might argue that the validator should be modified to allow -lock classes to be reused. However, if you are tempted to make this -argument, first review the code and think through the changes that would -be required, keeping in mind that the lock classes to be removed are -likely to be linked into the lock-dependency graph. This turns out to -be harder to do than to say. - -Of course, if you do run out of lock classes, the next thing to do is -to find the offending lock classes. First, the following command gives -you the number of lock classes currently in use along with the maximum: - - grep "lock-classes" /proc/lockdep_stats - -This command produces the following output on a modest system: - - lock-classes: 748 [max: 8191] - -If the number allocated (748 above) increases continually over time, -then there is likely a leak. The following command can be used to -identify the leaking lock classes: - - grep "BD" /proc/lockdep - -Run the command and save the output, then compare against the output from -a later run of this command to identify the leakers. This same output -can also help you find situations where runtime lock initialization has -been omitted. diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt new file mode 100644 index 000000000000..5dbc99c04f6e --- /dev/null +++ b/Documentation/locking/lockdep-design.txt @@ -0,0 +1,286 @@ +Runtime locking correctness validator +===================================== + +started by Ingo Molnar +additions by Arjan van de Ven + +Lock-class +---------- + +The basic object the validator operates upon is a 'class' of locks. + +A class of locks is a group of locks that are logically the same with +respect to locking rules, even if the locks may have multiple (possibly +tens of thousands of) instantiations. For example a lock in the inode +struct is one class, while each inode has its own instantiation of that +lock class. + +The validator tracks the 'state' of lock-classes, and it tracks +dependencies between different lock-classes. The validator maintains a +rolling proof that the state and the dependencies are correct. + +Unlike an lock instantiation, the lock-class itself never goes away: when +a lock-class is used for the first time after bootup it gets registered, +and all subsequent uses of that lock-class will be attached to this +lock-class. + +State +----- + +The validator tracks lock-class usage history into 4n + 1 separate state bits: + +- 'ever held in STATE context' +- 'ever held as readlock in STATE context' +- 'ever held with STATE enabled' +- 'ever held as readlock with STATE enabled' + +Where STATE can be either one of (kernel/lockdep_states.h) + - hardirq + - softirq + - reclaim_fs + +- 'ever used' [ == !unused ] + +When locking rules are violated, these state bits are presented in the +locking error messages, inside curlies. A contrived example: + + modprobe/2287 is trying to acquire lock: + (&sio_locks[i].lock){-.-...}, at: [] mutex_lock+0x21/0x24 + + but task is already holding lock: + (&sio_locks[i].lock){-.-...}, at: [] mutex_lock+0x21/0x24 + + +The bit position indicates STATE, STATE-read, for each of the states listed +above, and the character displayed in each indicates: + + '.' acquired while irqs disabled and not in irq context + '-' acquired in irq context + '+' acquired with irqs enabled + '?' acquired in irq context with irqs enabled. + +Unused mutexes cannot be part of the cause of an error. + + +Single-lock state rules: +------------------------ + +A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The +following states are exclusive, and only one of them is allowed to be +set for any lock-class: + + and + and + +The validator detects and reports lock usage that violate these +single-lock state rules. + +Multi-lock dependency rules: +---------------------------- + +The same lock-class must not be acquired twice, because this could lead +to lock recursion deadlocks. + +Furthermore, two locks may not be taken in different order: + + -> + -> + +because this could lead to lock inversion deadlocks. (The validator +finds such dependencies in arbitrary complexity, i.e. there can be any +other locking sequence between the acquire-lock operations, the +validator will still track all dependencies between locks.) + +Furthermore, the following usage based lock dependencies are not allowed +between any two lock-classes: + + -> + -> + +The first rule comes from the fact the a hardirq-safe lock could be +taken by a hardirq context, interrupting a hardirq-unsafe lock - and +thus could result in a lock inversion deadlock. Likewise, a softirq-safe +lock could be taken by an softirq context, interrupting a softirq-unsafe +lock. + +The above rules are enforced for any locking sequence that occurs in the +kernel: when acquiring a new lock, the validator checks whether there is +any rule violation between the new lock and any of the held locks. + +When a lock-class changes its state, the following aspects of the above +dependency rules are enforced: + +- if a new hardirq-safe lock is discovered, we check whether it + took any hardirq-unsafe lock in the past. + +- if a new softirq-safe lock is discovered, we check whether it took + any softirq-unsafe lock in the past. + +- if a new hardirq-unsafe lock is discovered, we check whether any + hardirq-safe lock took it in the past. + +- if a new softirq-unsafe lock is discovered, we check whether any + softirq-safe lock took it in the past. + +(Again, we do these checks too on the basis that an interrupt context +could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which +could lead to a lock inversion deadlock - even if that lock scenario did +not trigger in practice yet.) + +Exception: Nested data dependencies leading to nested locking +------------------------------------------------------------- + +There are a few cases where the Linux kernel acquires more than one +instance of the same lock-class. Such cases typically happen when there +is some sort of hierarchy within objects of the same type. In these +cases there is an inherent "natural" ordering between the two objects +(defined by the properties of the hierarchy), and the kernel grabs the +locks in this fixed order on each of the objects. + +An example of such an object hierarchy that results in "nested locking" +is that of a "whole disk" block-dev object and a "partition" block-dev +object; the partition is "part of" the whole device and as long as one +always takes the whole disk lock as a higher lock than the partition +lock, the lock ordering is fully correct. The validator does not +automatically detect this natural ordering, as the locking rule behind +the ordering is not static. + +In order to teach the validator about this correct usage model, new +versions of the various locking primitives were added that allow you to +specify a "nesting level". An example call, for the block device mutex, +looks like this: + +enum bdev_bd_mutex_lock_class +{ + BD_MUTEX_NORMAL, + BD_MUTEX_WHOLE, + BD_MUTEX_PARTITION +}; + + mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION); + +In this case the locking is done on a bdev object that is known to be a +partition. + +The validator treats a lock that is taken in such a nested fashion as a +separate (sub)class for the purposes of validation. + +Note: When changing code to use the _nested() primitives, be careful and +check really thoroughly that the hierarchy is correctly mapped; otherwise +you can get false positives or false negatives. + +Proof of 100% correctness: +-------------------------- + +The validator achieves perfect, mathematical 'closure' (proof of locking +correctness) in the sense that for every simple, standalone single-task +locking sequence that occurred at least once during the lifetime of the +kernel, the validator proves it with a 100% certainty that no +combination and timing of these locking sequences can cause any class of +lock related deadlock. [*] + +I.e. complex multi-CPU and multi-task locking scenarios do not have to +occur in practice to prove a deadlock: only the simple 'component' +locking chains have to occur at least once (anytime, in any +task/context) for the validator to be able to prove correctness. (For +example, complex deadlocks that would normally need more than 3 CPUs and +a very unlikely constellation of tasks, irq-contexts and timings to +occur, can be detected on a plain, lightly loaded single-CPU system as +well!) + +This radically decreases the complexity of locking related QA of the +kernel: what has to be done during QA is to trigger as many "simple" +single-task locking dependencies in the kernel as possible, at least +once, to prove locking correctness - instead of having to trigger every +possible combination of locking interaction between CPUs, combined with +every possible hardirq and softirq nesting scenario (which is impossible +to do in practice). + +[*] assuming that the validator itself is 100% correct, and no other + part of the system corrupts the state of the validator in any way. + We also assume that all NMI/SMM paths [which could interrupt + even hardirq-disabled codepaths] are correct and do not interfere + with the validator. We also assume that the 64-bit 'chain hash' + value is unique for every lock-chain in the system. Also, lock + recursion must not be higher than 20. + +Performance: +------------ + +The above rules require _massive_ amounts of runtime checking. If we did +that for every lock taken and for every irqs-enable event, it would +render the system practically unusably slow. The complexity of checking +is O(N^2), so even with just a few hundred lock-classes we'd have to do +tens of thousands of checks for every event. + +This problem is solved by checking any given 'locking scenario' (unique +sequence of locks taken after each other) only once. A simple stack of +held locks is maintained, and a lightweight 64-bit hash value is +calculated, which hash is unique for every lock chain. The hash value, +when the chain is validated for the first time, is then put into a hash +table, which hash-table can be checked in a lockfree manner. If the +locking chain occurs again later on, the hash table tells us that we +dont have to validate the chain again. + +Troubleshooting: +---------------- + +The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes. +Exceeding this number will trigger the following lockdep warning: + + (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) + +By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical +desktop systems have less than 1,000 lock classes, so this warning +normally results from lock-class leakage or failure to properly +initialize locks. These two problems are illustrated below: + +1. Repeated module loading and unloading while running the validator + will result in lock-class leakage. The issue here is that each + load of the module will create a new set of lock classes for + that module's locks, but module unloading does not remove old + classes (see below discussion of reuse of lock classes for why). + Therefore, if that module is loaded and unloaded repeatedly, + the number of lock classes will eventually reach the maximum. + +2. Using structures such as arrays that have large numbers of + locks that are not explicitly initialized. For example, + a hash table with 8192 buckets where each bucket has its own + spinlock_t will consume 8192 lock classes -unless- each spinlock + is explicitly initialized at runtime, for example, using the + run-time spin_lock_init() as opposed to compile-time initializers + such as __SPIN_LOCK_UNLOCKED(). Failure to properly initialize + the per-bucket spinlocks would guarantee lock-class overflow. + In contrast, a loop that called spin_lock_init() on each lock + would place all 8192 locks into a single lock class. + + The moral of this story is that you should always explicitly + initialize your locks. + +One might argue that the validator should be modified to allow +lock classes to be reused. However, if you are tempted to make this +argument, first review the code and think through the changes that would +be required, keeping in mind that the lock classes to be removed are +likely to be linked into the lock-dependency graph. This turns out to +be harder to do than to say. + +Of course, if you do run out of lock classes, the next thing to do is +to find the offending lock classes. First, the following command gives +you the number of lock classes currently in use along with the maximum: + + grep "lock-classes" /proc/lockdep_stats + +This command produces the following output on a modest system: + + lock-classes: 748 [max: 8191] + +If the number allocated (748 above) increases continually over time, +then there is likely a leak. The following command can be used to +identify the leaking lock classes: + + grep "BD" /proc/lockdep + +Run the command and save the output, then compare against the output from +a later run of this command to identify the leakers. This same output +can also help you find situations where runtime lock initialization has +been omitted. diff --git a/Documentation/locking/lockstat.txt b/Documentation/locking/lockstat.txt new file mode 100644 index 000000000000..7428773a1e69 --- /dev/null +++ b/Documentation/locking/lockstat.txt @@ -0,0 +1,178 @@ + +LOCK STATISTICS + +- WHAT + +As the name suggests, it provides statistics on locks. + +- WHY + +Because things like lock contention can severely impact performance. + +- HOW + +Lockdep already has hooks in the lock functions and maps lock instances to +lock classes. We build on that (see Documentation/lokcing/lockdep-design.txt). +The graph below shows the relation between the lock functions and the various +hooks therein. + + __acquire + | + lock _____ + | \ + | __contended + | | + | + | _______/ + |/ + | + __acquired + | + . + + . + | + __release + | + unlock + +lock, unlock - the regular lock functions +__* - the hooks +<> - states + +With these hooks we provide the following statistics: + + con-bounces - number of lock contention that involved x-cpu data + contentions - number of lock acquisitions that had to wait + wait time min - shortest (non-0) time we ever had to wait for a lock + max - longest time we ever had to wait for a lock + total - total time we spend waiting on this lock + avg - average time spent waiting on this lock + acq-bounces - number of lock acquisitions that involved x-cpu data + acquisitions - number of times we took the lock + hold time min - shortest (non-0) time we ever held the lock + max - longest time we ever held the lock + total - total time this lock was held + avg - average time this lock was held + +These numbers are gathered per lock class, per read/write state (when +applicable). + +It also tracks 4 contention points per class. A contention point is a call site +that had to wait on lock acquisition. + + - CONFIGURATION + +Lock statistics are enabled via CONFIG_LOCK_STAT. + + - USAGE + +Enable collection of statistics: + +# echo 1 >/proc/sys/kernel/lock_stat + +Disable collection of statistics: + +# echo 0 >/proc/sys/kernel/lock_stat + +Look at the current lock statistics: + +( line numbers not part of actual output, done for clarity in the explanation + below ) + +# less /proc/lock_stat + +01 lock_stat version 0.4 +02----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +03 class name con-bounces contentions waittime-min waittime-max waittime-total waittime-avg acq-bounces acquisitions holdtime-min holdtime-max holdtime-total holdtime-avg +04----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +05 +06 &mm->mmap_sem-W: 46 84 0.26 939.10 16371.53 194.90 47291 2922365 0.16 2220301.69 17464026916.32 5975.99 +07 &mm->mmap_sem-R: 37 100 1.31 299502.61 325629.52 3256.30 212344 34316685 0.10 7744.91 95016910.20 2.77 +08 --------------- +09 &mm->mmap_sem 1 [] khugepaged_scan_mm_slot+0x57/0x280 +19 &mm->mmap_sem 96 [] __do_page_fault+0x1d4/0x510 +11 &mm->mmap_sem 34 [] vm_mmap_pgoff+0x87/0xd0 +12 &mm->mmap_sem 17 [] vm_munmap+0x41/0x80 +13 --------------- +14 &mm->mmap_sem 1 [] dup_mmap+0x2a/0x3f0 +15 &mm->mmap_sem 60 [] SyS_mprotect+0xe9/0x250 +16 &mm->mmap_sem 41 [] __do_page_fault+0x1d4/0x510 +17 &mm->mmap_sem 68 [] vm_mmap_pgoff+0x87/0xd0 +18 +19............................................................................................................................................................................................................................. +20 +21 unix_table_lock: 110 112 0.21 49.24 163.91 1.46 21094 66312 0.12 624.42 31589.81 0.48 +22 --------------- +23 unix_table_lock 45 [] unix_create1+0x16e/0x1b0 +24 unix_table_lock 47 [] unix_release_sock+0x31/0x250 +25 unix_table_lock 15 [] unix_find_other+0x117/0x230 +26 unix_table_lock 5 [] unix_autobind+0x11f/0x1b0 +27 --------------- +28 unix_table_lock 39 [] unix_release_sock+0x31/0x250 +29 unix_table_lock 49 [] unix_create1+0x16e/0x1b0 +30 unix_table_lock 20 [] unix_find_other+0x117/0x230 +31 unix_table_lock 4 [] unix_autobind+0x11f/0x1b0 + + +This excerpt shows the first two lock class statistics. Line 01 shows the +output version - each time the format changes this will be updated. Line 02-04 +show the header with column descriptions. Lines 05-18 and 20-31 show the actual +statistics. These statistics come in two parts; the actual stats separated by a +short separator (line 08, 13) from the contention points. + +The first lock (05-18) is a read/write lock, and shows two lines above the +short separator. The contention points don't match the column descriptors, +they have two: contentions and [] symbol. The second set of contention +points are the points we're contending with. + +The integer part of the time values is in us. + +Dealing with nested locks, subclasses may appear: + +32........................................................................................................................................................................................................................... +33 +34 &rq->lock: 13128 13128 0.43 190.53 103881.26 7.91 97454 3453404 0.00 401.11 13224683.11 3.82 +35 --------- +36 &rq->lock 645 [] task_rq_lock+0x43/0x75 +37 &rq->lock 297 [] try_to_wake_up+0x127/0x25a +38 &rq->lock 360 [] select_task_rq_fair+0x1f0/0x74a +39 &rq->lock 428 [] scheduler_tick+0x46/0x1fb +40 --------- +41 &rq->lock 77 [] task_rq_lock+0x43/0x75 +42 &rq->lock 174 [] try_to_wake_up+0x127/0x25a +43 &rq->lock 4715 [] double_rq_lock+0x42/0x54 +44 &rq->lock 893 [] schedule+0x157/0x7b8 +45 +46........................................................................................................................................................................................................................... +47 +48 &rq->lock/1: 1526 11488 0.33 388.73 136294.31 11.86 21461 38404 0.00 37.93 109388.53 2.84 +49 ----------- +50 &rq->lock/1 11526 [] double_rq_lock+0x4f/0x54 +51 ----------- +52 &rq->lock/1 5645 [] double_rq_lock+0x42/0x54 +53 &rq->lock/1 1224 [] schedule+0x157/0x7b8 +54 &rq->lock/1 4336 [] double_rq_lock+0x4f/0x54 +55 &rq->lock/1 181 [] try_to_wake_up+0x127/0x25a + +Line 48 shows statistics for the second subclass (/1) of &rq->lock class +(subclass starts from 0), since in this case, as line 50 suggests, +double_rq_lock actually acquires a nested lock of two spinlocks. + +View the top contending locks: + +# grep : /proc/lock_stat | head + clockevents_lock: 2926159 2947636 0.15 46882.81 1784540466.34 605.41 3381345 3879161 0.00 2260.97 53178395.68 13.71 + tick_broadcast_lock: 346460 346717 0.18 2257.43 39364622.71 113.54 3642919 4242696 0.00 2263.79 49173646.60 11.59 + &mapping->i_mmap_mutex: 203896 203899 3.36 645530.05 31767507988.39 155800.21 3361776 8893984 0.17 2254.15 14110121.02 1.59 + &rq->lock: 135014 136909 0.18 606.09 842160.68 6.15 1540728 10436146 0.00 728.72 17606683.41 1.69 + &(&zone->lru_lock)->rlock: 93000 94934 0.16 59.18 188253.78 1.98 1199912 3809894 0.15 391.40 3559518.81 0.93 + tasklist_lock-W: 40667 41130 0.23 1189.42 428980.51 10.43 270278 510106 0.16 653.51 3939674.91 7.72 + tasklist_lock-R: 21298 21305 0.20 1310.05 215511.12 10.12 186204 241258 0.14 1162.33 1179779.23 4.89 + rcu_node_1: 47656 49022 0.16 635.41 193616.41 3.95 844888 1865423 0.00 764.26 1656226.96 0.89 + &(&dentry->d_lockref.lock)->rlock: 39791 40179 0.15 1302.08 88851.96 2.21 2790851 12527025 0.10 1910.75 3379714.27 0.27 + rcu_node_0: 29203 30064 0.16 786.55 1555573.00 51.74 88963 244254 0.00 398.87 428872.51 1.76 + +Clear the statistics: + +# echo 0 > /proc/lock_stat diff --git a/Documentation/locking/mutex-design.txt b/Documentation/locking/mutex-design.txt new file mode 100644 index 000000000000..ee231ed09ec6 --- /dev/null +++ b/Documentation/locking/mutex-design.txt @@ -0,0 +1,157 @@ +Generic Mutex Subsystem + +started by Ingo Molnar +updated by Davidlohr Bueso + +What are mutexes? +----------------- + +In the Linux kernel, mutexes refer to a particular locking primitive +that enforces serialization on shared memory systems, and not only to +the generic term referring to 'mutual exclusion' found in academia +or similar theoretical text books. Mutexes are sleeping locks which +behave similarly to binary semaphores, and were introduced in 2006[1] +as an alternative to these. This new data structure provided a number +of advantages, including simpler interfaces, and at that time smaller +code (see Disadvantages). + +[1] http://lwn.net/Articles/164802/ + +Implementation +-------------- + +Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h +and implemented in kernel/locking/mutex.c. These locks use a three +state atomic counter (->count) to represent the different possible +transitions that can occur during the lifetime of a lock: + + 1: unlocked + 0: locked, no waiters + negative: locked, with potential waiters + +In its most basic form it also includes a wait-queue and a spinlock +that serializes access to it. CONFIG_SMP systems can also include +a pointer to the lock task owner (->owner) as well as a spinner MCS +lock (->osq), both described below in (ii). + +When acquiring a mutex, there are three possible paths that can be +taken, depending on the state of the lock: + +(i) fastpath: tries to atomically acquire the lock by decrementing the + counter. If it was already taken by another task it goes to the next + possible path. This logic is architecture specific. On x86-64, the + locking fastpath is 2 instructions: + + 0000000000000e10 : + e21: f0 ff 0b lock decl (%rbx) + e24: 79 08 jns e2e + + the unlocking fastpath is equally tight: + + 0000000000000bc0 : + bc8: f0 ff 07 lock incl (%rdi) + bcb: 7f 0a jg bd7 + + +(ii) midpath: aka optimistic spinning, tries to spin for acquisition + while the lock owner is running and there are no other tasks ready + to run that have higher priority (need_resched). The rationale is + that if the lock owner is running, it is likely to release the lock + soon. The mutex spinners are queued up using MCS lock so that only + one spinner can compete for the mutex. + + The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock + with the desirable properties of being fair and with each cpu trying + to acquire the lock spinning on a local variable. It avoids expensive + cacheline bouncing that common test-and-set spinlock implementations + incur. An MCS-like lock is specially tailored for optimistic spinning + for sleeping lock implementation. An important feature of the customized + MCS lock is that it has the extra property that spinners are able to exit + the MCS spinlock queue when they need to reschedule. This further helps + avoid situations where MCS spinners that need to reschedule would continue + waiting to spin on mutex owner, only to go directly to slowpath upon + obtaining the MCS lock. + + +(iii) slowpath: last resort, if the lock is still unable to be acquired, + the task is added to the wait-queue and sleeps until woken up by the + unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE. + +While formally kernel mutexes are sleepable locks, it is path (ii) that +makes them more practically a hybrid type. By simply not interrupting a +task and busy-waiting for a few cycles instead of immediately sleeping, +the performance of this lock has been seen to significantly improve a +number of workloads. Note that this technique is also used for rw-semaphores. + +Semantics +--------- + +The mutex subsystem checks and enforces the following rules: + + - Only one task can hold the mutex at a time. + - Only the owner can unlock the mutex. + - Multiple unlocks are not permitted. + - Recursive locking/unlocking is not permitted. + - A mutex must only be initialized via the API (see below). + - A task may not exit with a mutex held. + - Memory areas where held locks reside must not be freed. + - Held mutexes must not be reinitialized. + - Mutexes may not be used in hardware or software interrupt + contexts such as tasklets and timers. + +These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled. +In addition, the mutex debugging code also implements a number of other +features that make lock debugging easier and faster: + + - Uses symbolic names of mutexes, whenever they are printed + in debug output. + - Point-of-acquire tracking, symbolic lookup of function names, + list of all locks held in the system, printout of them. + - Owner tracking. + - Detects self-recursing locks and prints out all relevant info. + - Detects multi-task circular deadlocks and prints out all affected + locks and tasks (and only those tasks). + + +Interfaces +---------- +Statically define the mutex: + DEFINE_MUTEX(name); + +Dynamically initialize the mutex: + mutex_init(mutex); + +Acquire the mutex, uninterruptible: + void mutex_lock(struct mutex *lock); + void mutex_lock_nested(struct mutex *lock, unsigned int subclass); + int mutex_trylock(struct mutex *lock); + +Acquire the mutex, interruptible: + int mutex_lock_interruptible_nested(struct mutex *lock, + unsigned int subclass); + int mutex_lock_interruptible(struct mutex *lock); + +Acquire the mutex, interruptible, if dec to 0: + int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); + +Unlock the mutex: + void mutex_unlock(struct mutex *lock); + +Test if the mutex is taken: + int mutex_is_locked(struct mutex *lock); + +Disadvantages +------------- + +Unlike its original design and purpose, 'struct mutex' is larger than +most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice +as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the +'struct rw_semaphore' variant. Larger structure sizes mean more CPU +cache and memory footprint. + +When to use mutexes +------------------- + +Unless the strict semantics of mutexes are unsuitable and/or the critical +region prevents the lock from being shared, always prefer them to any other +locking primitive. diff --git a/Documentation/locking/rt-mutex-design.txt b/Documentation/locking/rt-mutex-design.txt new file mode 100644 index 000000000000..8666070d3189 --- /dev/null +++ b/Documentation/locking/rt-mutex-design.txt @@ -0,0 +1,781 @@ +# +# Copyright (c) 2006 Steven Rostedt +# Licensed under the GNU Free Documentation License, Version 1.2 +# + +RT-mutex implementation design +------------------------------ + +This document tries to describe the design of the rtmutex.c implementation. +It doesn't describe the reasons why rtmutex.c exists. For that please see +Documentation/rt-mutex.txt. Although this document does explain problems +that happen without this code, but that is in the concept to understand +what the code actually is doing. + +The goal of this document is to help others understand the priority +inheritance (PI) algorithm that is used, as well as reasons for the +decisions that were made to implement PI in the manner that was done. + + +Unbounded Priority Inversion +---------------------------- + +Priority inversion is when a lower priority process executes while a higher +priority process wants to run. This happens for several reasons, and +most of the time it can't be helped. Anytime a high priority process wants +to use a resource that a lower priority process has (a mutex for example), +the high priority process must wait until the lower priority process is done +with the resource. This is a priority inversion. What we want to prevent +is something called unbounded priority inversion. That is when the high +priority process is prevented from running by a lower priority process for +an undetermined amount of time. + +The classic example of unbounded priority inversion is where you have three +processes, let's call them processes A, B, and C, where A is the highest +priority process, C is the lowest, and B is in between. A tries to grab a lock +that C owns and must wait and lets C run to release the lock. But in the +meantime, B executes, and since B is of a higher priority than C, it preempts C, +but by doing so, it is in fact preempting A which is a higher priority process. +Now there's no way of knowing how long A will be sleeping waiting for C +to release the lock, because for all we know, B is a CPU hog and will +never give C a chance to release the lock. This is called unbounded priority +inversion. + +Here's a little ASCII art to show the problem. + + grab lock L1 (owned by C) + | +A ---+ + C preempted by B + | +C +----+ + +B +--------> + B now keeps A from running. + + +Priority Inheritance (PI) +------------------------- + +There are several ways to solve this issue, but other ways are out of scope +for this document. Here we only discuss PI. + +PI is where a process inherits the priority of another process if the other +process blocks on a lock owned by the current process. To make this easier +to understand, let's use the previous example, with processes A, B, and C again. + +This time, when A blocks on the lock owned by C, C would inherit the priority +of A. So now if B becomes runnable, it would not preempt C, since C now has +the high priority of A. As soon as C releases the lock, it loses its +inherited priority, and A then can continue with the resource that C had. + +Terminology +----------- + +Here I explain some terminology that is used in this document to help describe +the design that is used to implement PI. + +PI chain - The PI chain is an ordered series of locks and processes that cause + processes to inherit priorities from a previous process that is + blocked on one of its locks. This is described in more detail + later in this document. + +mutex - In this document, to differentiate from locks that implement + PI and spin locks that are used in the PI code, from now on + the PI locks will be called a mutex. + +lock - In this document from now on, I will use the term lock when + referring to spin locks that are used to protect parts of the PI + algorithm. These locks disable preemption for UP (when + CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from + entering critical sections simultaneously. + +spin lock - Same as lock above. + +waiter - A waiter is a struct that is stored on the stack of a blocked + process. Since the scope of the waiter is within the code for + a process being blocked on the mutex, it is fine to allocate + the waiter on the process's stack (local variable). This + structure holds a pointer to the task, as well as the mutex that + the task is blocked on. It also has the plist node structures to + place the task in the waiter_list of a mutex as well as the + pi_list of a mutex owner task (described below). + + waiter is sometimes used in reference to the task that is waiting + on a mutex. This is the same as waiter->task. + +waiters - A list of processes that are blocked on a mutex. + +top waiter - The highest priority process waiting on a specific mutex. + +top pi waiter - The highest priority process waiting on one of the mutexes + that a specific process owns. + +Note: task and process are used interchangeably in this document, mostly to + differentiate between two processes that are being described together. + + +PI chain +-------- + +The PI chain is a list of processes and mutexes that may cause priority +inheritance to take place. Multiple chains may converge, but a chain +would never diverge, since a process can't be blocked on more than one +mutex at a time. + +Example: + + Process: A, B, C, D, E + Mutexes: L1, L2, L3, L4 + + A owns: L1 + B blocked on L1 + B owns L2 + C blocked on L2 + C owns L3 + D blocked on L3 + D owns L4 + E blocked on L4 + +The chain would be: + + E->L4->D->L3->C->L2->B->L1->A + +To show where two chains merge, we could add another process F and +another mutex L5 where B owns L5 and F is blocked on mutex L5. + +The chain for F would be: + + F->L5->B->L1->A + +Since a process may own more than one mutex, but never be blocked on more than +one, the chains merge. + +Here we show both chains: + + E->L4->D->L3->C->L2-+ + | + +->B->L1->A + | + F->L5-+ + +For PI to work, the processes at the right end of these chains (or we may +also call it the Top of the chain) must be equal to or higher in priority +than the processes to the left or below in the chain. + +Also since a mutex may have more than one process blocked on it, we can +have multiple chains merge at mutexes. If we add another process G that is +blocked on mutex L2: + + G->L2->B->L1->A + +And once again, to show how this can grow I will show the merging chains +again. + + E->L4->D->L3->C-+ + +->L2-+ + | | + G-+ +->B->L1->A + | + F->L5-+ + + +Plist +----- + +Before I go further and talk about how the PI chain is stored through lists +on both mutexes and processes, I'll explain the plist. This is similar to +the struct list_head functionality that is already in the kernel. +The implementation of plist is out of scope for this document, but it is +very important to understand what it does. + +There are a few differences between plist and list, the most important one +being that plist is a priority sorted linked list. This means that the +priorities of the plist are sorted, such that it takes O(1) to retrieve the +highest priority item in the list. Obviously this is useful to store processes +based on their priorities. + +Another difference, which is important for implementation, is that, unlike +list, the head of the list is a different element than the nodes of a list. +So the head of the list is declared as struct plist_head and nodes that will +be added to the list are declared as struct plist_node. + + +Mutex Waiter List +----------------- + +Every mutex keeps track of all the waiters that are blocked on itself. The mutex +has a plist to store these waiters by priority. This list is protected by +a spin lock that is located in the struct of the mutex. This lock is called +wait_lock. Since the modification of the waiter list is never done in +interrupt context, the wait_lock can be taken without disabling interrupts. + + +Task PI List +------------ + +To keep track of the PI chains, each process has its own PI list. This is +a list of all top waiters of the mutexes that are owned by the process. +Note that this list only holds the top waiters and not all waiters that are +blocked on mutexes owned by the process. + +The top of the task's PI list is always the highest priority task that +is waiting on a mutex that is owned by the task. So if the task has +inherited a priority, it will always be the priority of the task that is +at the top of this list. + +This list is stored in the task structure of a process as a plist called +pi_list. This list is protected by a spin lock also in the task structure, +called pi_lock. This lock may also be taken in interrupt context, so when +locking the pi_lock, interrupts must be disabled. + + +Depth of the PI Chain +--------------------- + +The maximum depth of the PI chain is not dynamic, and could actually be +defined. But is very complex to figure it out, since it depends on all +the nesting of mutexes. Let's look at the example where we have 3 mutexes, +L1, L2, and L3, and four separate functions func1, func2, func3 and func4. +The following shows a locking order of L1->L2->L3, but may not actually +be directly nested that way. + +void func1(void) +{ + mutex_lock(L1); + + /* do anything */ + + mutex_unlock(L1); +} + +void func2(void) +{ + mutex_lock(L1); + mutex_lock(L2); + + /* do something */ + + mutex_unlock(L2); + mutex_unlock(L1); +} + +void func3(void) +{ + mutex_lock(L2); + mutex_lock(L3); + + /* do something else */ + + mutex_unlock(L3); + mutex_unlock(L2); +} + +void func4(void) +{ + mutex_lock(L3); + + /* do something again */ + + mutex_unlock(L3); +} + +Now we add 4 processes that run each of these functions separately. +Processes A, B, C, and D which run functions func1, func2, func3 and func4 +respectively, and such that D runs first and A last. With D being preempted +in func4 in the "do something again" area, we have a locking that follows: + +D owns L3 + C blocked on L3 + C owns L2 + B blocked on L2 + B owns L1 + A blocked on L1 + +And thus we have the chain A->L1->B->L2->C->L3->D. + +This gives us a PI depth of 4 (four processes), but looking at any of the +functions individually, it seems as though they only have at most a locking +depth of two. So, although the locking depth is defined at compile time, +it still is very difficult to find the possibilities of that depth. + +Now since mutexes can be defined by user-land applications, we don't want a DOS +type of application that nests large amounts of mutexes to create a large +PI chain, and have the code holding spin locks while looking at a large +amount of data. So to prevent this, the implementation not only implements +a maximum lock depth, but also only holds at most two different locks at a +time, as it walks the PI chain. More about this below. + + +Mutex owner and flags +--------------------- + +The mutex structure contains a pointer to the owner of the mutex. If the +mutex is not owned, this owner is set to NULL. Since all architectures +have the task structure on at least a four byte alignment (and if this is +not true, the rtmutex.c code will be broken!), this allows for the two +least significant bits to be used as flags. This part is also described +in Documentation/rt-mutex.txt, but will also be briefly described here. + +Bit 0 is used as the "Pending Owner" flag. This is described later. +Bit 1 is used as the "Has Waiters" flags. This is also described later + in more detail, but is set whenever there are waiters on a mutex. + + +cmpxchg Tricks +-------------- + +Some architectures implement an atomic cmpxchg (Compare and Exchange). This +is used (when applicable) to keep the fast path of grabbing and releasing +mutexes short. + +cmpxchg is basically the following function performed atomically: + +unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C) +{ + unsigned long T = *A; + if (*A == *B) { + *A = *C; + } + return T; +} +#define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c) + +This is really nice to have, since it allows you to only update a variable +if the variable is what you expect it to be. You know if it succeeded if +the return value (the old value of A) is equal to B. + +The macro rt_mutex_cmpxchg is used to try to lock and unlock mutexes. If +the architecture does not support CMPXCHG, then this macro is simply set +to fail every time. But if CMPXCHG is supported, then this will +help out extremely to keep the fast path short. + +The use of rt_mutex_cmpxchg with the flags in the owner field help optimize +the system for architectures that support it. This will also be explained +later in this document. + + +Priority adjustments +-------------------- + +The implementation of the PI code in rtmutex.c has several places that a +process must adjust its priority. With the help of the pi_list of a +process this is rather easy to know what needs to be adjusted. + +The functions implementing the task adjustments are rt_mutex_adjust_prio, +__rt_mutex_adjust_prio (same as the former, but expects the task pi_lock +to already be taken), rt_mutex_getprio, and rt_mutex_setprio. + +rt_mutex_getprio and rt_mutex_setprio are only used in __rt_mutex_adjust_prio. + +rt_mutex_getprio returns the priority that the task should have. Either the +task's own normal priority, or if a process of a higher priority is waiting on +a mutex owned by the task, then that higher priority should be returned. +Since the pi_list of a task holds an order by priority list of all the top +waiters of all the mutexes that the task owns, rt_mutex_getprio simply needs +to compare the top pi waiter to its own normal priority, and return the higher +priority back. + +(Note: if looking at the code, you will notice that the lower number of + prio is returned. This is because the prio field in the task structure + is an inverse order of the actual priority. So a "prio" of 5 is + of higher priority than a "prio" of 10.) + +__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the +result does not equal the task's current priority, then rt_mutex_setprio +is called to adjust the priority of the task to the new priority. +Note that rt_mutex_setprio is defined in kernel/sched/core.c to implement the +actual change in priority. + +It is interesting to note that __rt_mutex_adjust_prio can either increase +or decrease the priority of the task. In the case that a higher priority +process has just blocked on a mutex owned by the task, __rt_mutex_adjust_prio +would increase/boost the task's priority. But if a higher priority task +were for some reason to leave the mutex (timeout or signal), this same function +would decrease/unboost the priority of the task. That is because the pi_list +always contains the highest priority task that is waiting on a mutex owned +by the task, so we only need to compare the priority of that top pi waiter +to the normal priority of the given task. + + +High level overview of the PI chain walk +---------------------------------------- + +The PI chain walk is implemented by the function rt_mutex_adjust_prio_chain. + +The implementation has gone through several iterations, and has ended up +with what we believe is the best. It walks the PI chain by only grabbing +at most two locks at a time, and is very efficient. + +The rt_mutex_adjust_prio_chain can be used either to boost or lower process +priorities. + +rt_mutex_adjust_prio_chain is called with a task to be checked for PI +(de)boosting (the owner of a mutex that a process is blocking on), a flag to +check for deadlocking, the mutex that the task owns, and a pointer to a waiter +that is the process's waiter struct that is blocked on the mutex (although this +parameter may be NULL for deboosting). + +For this explanation, I will not mention deadlock detection. This explanation +will try to stay at a high level. + +When this function is called, there are no locks held. That also means +that the state of the owner and lock can change when entered into this function. + +Before this function is called, the task has already had rt_mutex_adjust_prio +performed on it. This means that the task is set to the priority that it +should be at, but the plist nodes of the task's waiter have not been updated +with the new priorities, and that this task may not be in the proper locations +in the pi_lists and wait_lists that the task is blocked on. This function +solves all that. + +A loop is entered, where task is the owner to be checked for PI changes that +was passed by parameter (for the first iteration). The pi_lock of this task is +taken to prevent any more changes to the pi_list of the task. This also +prevents new tasks from completing the blocking on a mutex that is owned by this +task. + +If the task is not blocked on a mutex then the loop is exited. We are at +the top of the PI chain. + +A check is now done to see if the original waiter (the process that is blocked +on the current mutex) is the top pi waiter of the task. That is, is this +waiter on the top of the task's pi_list. If it is not, it either means that +there is another process higher in priority that is blocked on one of the +mutexes that the task owns, or that the waiter has just woken up via a signal +or timeout and has left the PI chain. In either case, the loop is exited, since +we don't need to do any more changes to the priority of the current task, or any +task that owns a mutex that this current task is waiting on. A priority chain +walk is only needed when a new top pi waiter is made to a task. + +The next check sees if the task's waiter plist node has the priority equal to +the priority the task is set at. If they are equal, then we are done with +the loop. Remember that the function started with the priority of the +task adjusted, but the plist nodes that hold the task in other processes +pi_lists have not been adjusted. + +Next, we look at the mutex that the task is blocked on. The mutex's wait_lock +is taken. This is done by a spin_trylock, because the locking order of the +pi_lock and wait_lock goes in the opposite direction. If we fail to grab the +lock, the pi_lock is released, and we restart the loop. + +Now that we have both the pi_lock of the task as well as the wait_lock of +the mutex the task is blocked on, we update the task's waiter's plist node +that is located on the mutex's wait_list. + +Now we release the pi_lock of the task. + +Next the owner of the mutex has its pi_lock taken, so we can update the +task's entry in the owner's pi_list. If the task is the highest priority +process on the mutex's wait_list, then we remove the previous top waiter +from the owner's pi_list, and replace it with the task. + +Note: It is possible that the task was the current top waiter on the mutex, + in which case the task is not yet on the pi_list of the waiter. This + is OK, since plist_del does nothing if the plist node is not on any + list. + +If the task was not the top waiter of the mutex, but it was before we +did the priority updates, that means we are deboosting/lowering the +task. In this case, the task is removed from the pi_list of the owner, +and the new top waiter is added. + +Lastly, we unlock both the pi_lock of the task, as well as the mutex's +wait_lock, and continue the loop again. On the next iteration of the +loop, the previous owner of the mutex will be the task that will be +processed. + +Note: One might think that the owner of this mutex might have changed + since we just grab the mutex's wait_lock. And one could be right. + The important thing to remember is that the owner could not have + become the task that is being processed in the PI chain, since + we have taken that task's pi_lock at the beginning of the loop. + So as long as there is an owner of this mutex that is not the same + process as the tasked being worked on, we are OK. + + Looking closely at the code, one might be confused. The check for the + end of the PI chain is when the task isn't blocked on anything or the + task's waiter structure "task" element is NULL. This check is + protected only by the task's pi_lock. But the code to unlock the mutex + sets the task's waiter structure "task" element to NULL with only + the protection of the mutex's wait_lock, which was not taken yet. + Isn't this a race condition if the task becomes the new owner? + + The answer is No! The trick is the spin_trylock of the mutex's + wait_lock. If we fail that lock, we release the pi_lock of the + task and continue the loop, doing the end of PI chain check again. + + In the code to release the lock, the wait_lock of the mutex is held + the entire time, and it is not let go when we grab the pi_lock of the + new owner of the mutex. So if the switch of a new owner were to happen + after the check for end of the PI chain and the grabbing of the + wait_lock, the unlocking code would spin on the new owner's pi_lock + but never give up the wait_lock. So the PI chain loop is guaranteed to + fail the spin_trylock on the wait_lock, release the pi_lock, and + try again. + + If you don't quite understand the above, that's OK. You don't have to, + unless you really want to make a proof out of it ;) + + +Pending Owners and Lock stealing +-------------------------------- + +One of the flags in the owner field of the mutex structure is "Pending Owner". +What this means is that an owner was chosen by the process releasing the +mutex, but that owner has yet to wake up and actually take the mutex. + +Why is this important? Why can't we just give the mutex to another process +and be done with it? + +The PI code is to help with real-time processes, and to let the highest +priority process run as long as possible with little latencies and delays. +If a high priority process owns a mutex that a lower priority process is +blocked on, when the mutex is released it would be given to the lower priority +process. What if the higher priority process wants to take that mutex again. +The high priority process would fail to take that mutex that it just gave up +and it would need to boost the lower priority process to run with full +latency of that critical section (since the low priority process just entered +it). + +There's no reason a high priority process that gives up a mutex should be +penalized if it tries to take that mutex again. If the new owner of the +mutex has not woken up yet, there's no reason that the higher priority process +could not take that mutex away. + +To solve this, we introduced Pending Ownership and Lock Stealing. When a +new process is given a mutex that it was blocked on, it is only given +pending ownership. This means that it's the new owner, unless a higher +priority process comes in and tries to grab that mutex. If a higher priority +process does come along and wants that mutex, we let the higher priority +process "steal" the mutex from the pending owner (only if it is still pending) +and continue with the mutex. + + +Taking of a mutex (The walk through) +------------------------------------ + +OK, now let's take a look at the detailed walk through of what happens when +taking a mutex. + +The first thing that is tried is the fast taking of the mutex. This is +done when we have CMPXCHG enabled (otherwise the fast taking automatically +fails). Only when the owner field of the mutex is NULL can the lock be +taken with the CMPXCHG and nothing else needs to be done. + +If there is contention on the lock, whether it is owned or pending owner +we go about the slow path (rt_mutex_slowlock). + +The slow path function is where the task's waiter structure is created on +the stack. This is because the waiter structure is only needed for the +scope of this function. The waiter structure holds the nodes to store +the task on the wait_list of the mutex, and if need be, the pi_list of +the owner. + +The wait_lock of the mutex is taken since the slow path of unlocking the +mutex also takes this lock. + +We then call try_to_take_rt_mutex. This is where the architecture that +does not implement CMPXCHG would always grab the lock (if there's no +contention). + +try_to_take_rt_mutex is used every time the task tries to grab a mutex in the +slow path. The first thing that is done here is an atomic setting of +the "Has Waiters" flag of the mutex's owner field. Yes, this could really +be false, because if the mutex has no owner, there are no waiters and +the current task also won't have any waiters. But we don't have the lock +yet, so we assume we are going to be a waiter. The reason for this is to +play nice for those architectures that do have CMPXCHG. By setting this flag +now, the owner of the mutex can't release the mutex without going into the +slow unlock path, and it would then need to grab the wait_lock, which this +code currently holds. So setting the "Has Waiters" flag forces the owner +to synchronize with this code. + +Now that we know that we can't have any races with the owner releasing the +mutex, we check to see if we can take the ownership. This is done if the +mutex doesn't have a owner, or if we can steal the mutex from a pending +owner. Let's look at the situations we have here. + + 1) Has owner that is pending + ---------------------------- + + The mutex has a owner, but it hasn't woken up and the mutex flag + "Pending Owner" is set. The first check is to see if the owner isn't the + current task. This is because this function is also used for the pending + owner to grab the mutex. When a pending owner wakes up, it checks to see + if it can take the mutex, and this is done if the owner is already set to + itself. If so, we succeed and leave the function, clearing the "Pending + Owner" bit. + + If the pending owner is not current, we check to see if the current priority is + higher than the pending owner. If not, we fail the function and return. + + There's also something special about a pending owner. That is a pending owner + is never blocked on a mutex. So there is no PI chain to worry about. It also + means that if the mutex doesn't have any waiters, there's no accounting needed + to update the pending owner's pi_list, since we only worry about processes + blocked on the current mutex. + + If there are waiters on this mutex, and we just stole the ownership, we need + to take the top waiter, remove it from the pi_list of the pending owner, and + add it to the current pi_list. Note that at this moment, the pending owner + is no longer on the list of waiters. This is fine, since the pending owner + would add itself back when it realizes that it had the ownership stolen + from itself. When the pending owner tries to grab the mutex, it will fail + in try_to_take_rt_mutex if the owner field points to another process. + + 2) No owner + ----------- + + If there is no owner (or we successfully stole the lock), we set the owner + of the mutex to current, and set the flag of "Has Waiters" if the current + mutex actually has waiters, or we clear the flag if it doesn't. See, it was + OK that we set that flag early, since now it is cleared. + + 3) Failed to grab ownership + --------------------------- + + The most interesting case is when we fail to take ownership. This means that + there exists an owner, or there's a pending owner with equal or higher + priority than the current task. + +We'll continue on the failed case. + +If the mutex has a timeout, we set up a timer to go off to break us out +of this mutex if we failed to get it after a specified amount of time. + +Now we enter a loop that will continue to try to take ownership of the mutex, or +fail from a timeout or signal. + +Once again we try to take the mutex. This will usually fail the first time +in the loop, since it had just failed to get the mutex. But the second time +in the loop, this would likely succeed, since the task would likely be +the pending owner. + +If the mutex is TASK_INTERRUPTIBLE a check for signals and timeout is done +here. + +The waiter structure has a "task" field that points to the task that is blocked +on the mutex. This field can be NULL the first time it goes through the loop +or if the task is a pending owner and had its mutex stolen. If the "task" +field is NULL then we need to set up the accounting for it. + +Task blocks on mutex +-------------------- + +The accounting of a mutex and process is done with the waiter structure of +the process. The "task" field is set to the process, and the "lock" field +to the mutex. The plist nodes are initialized to the processes current +priority. + +Since the wait_lock was taken at the entry of the slow lock, we can safely +add the waiter to the wait_list. If the current process is the highest +priority process currently waiting on this mutex, then we remove the +previous top waiter process (if it exists) from the pi_list of the owner, +and add the current process to that list. Since the pi_list of the owner +has changed, we call rt_mutex_adjust_prio on the owner to see if the owner +should adjust its priority accordingly. + +If the owner is also blocked on a lock, and had its pi_list changed +(or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead +and run rt_mutex_adjust_prio_chain on the owner, as described earlier. + +Now all locks are released, and if the current process is still blocked on a +mutex (waiter "task" field is not NULL), then we go to sleep (call schedule). + +Waking up in the loop +--------------------- + +The schedule can then wake up for a few reasons. + 1) we were given pending ownership of the mutex. + 2) we received a signal and was TASK_INTERRUPTIBLE + 3) we had a timeout and was TASK_INTERRUPTIBLE + +In any of these cases, we continue the loop and once again try to grab the +ownership of the mutex. If we succeed, we exit the loop, otherwise we continue +and on signal and timeout, will exit the loop, or if we had the mutex stolen +we just simply add ourselves back on the lists and go back to sleep. + +Note: For various reasons, because of timeout and signals, the steal mutex + algorithm needs to be careful. This is because the current process is + still on the wait_list. And because of dynamic changing of priorities, + especially on SCHED_OTHER tasks, the current process can be the + highest priority task on the wait_list. + +Failed to get mutex on Timeout or Signal +---------------------------------------- + +If a timeout or signal occurred, the waiter's "task" field would not be +NULL and the task needs to be taken off the wait_list of the mutex and perhaps +pi_list of the owner. If this process was a high priority process, then +the rt_mutex_adjust_prio_chain needs to be executed again on the owner, +but this time it will be lowering the priorities. + + +Unlocking the Mutex +------------------- + +The unlocking of a mutex also has a fast path for those architectures with +CMPXCHG. Since the taking of a mutex on contention always sets the +"Has Waiters" flag of the mutex's owner, we use this to know if we need to +take the slow path when unlocking the mutex. If the mutex doesn't have any +waiters, the owner field of the mutex would equal the current process and +the mutex can be unlocked by just replacing the owner field with NULL. + +If the owner field has the "Has Waiters" bit set (or CMPXCHG is not available), +the slow unlock path is taken. + +The first thing done in the slow unlock path is to take the wait_lock of the +mutex. This synchronizes the locking and unlocking of the mutex. + +A check is made to see if the mutex has waiters or not. On architectures that +do not have CMPXCHG, this is the location that the owner of the mutex will +determine if a waiter needs to be awoken or not. On architectures that +do have CMPXCHG, that check is done in the fast path, but it is still needed +in the slow path too. If a waiter of a mutex woke up because of a signal +or timeout between the time the owner failed the fast path CMPXCHG check and +the grabbing of the wait_lock, the mutex may not have any waiters, thus the +owner still needs to make this check. If there are no waiters then the mutex +owner field is set to NULL, the wait_lock is released and nothing more is +needed. + +If there are waiters, then we need to wake one up and give that waiter +pending ownership. + +On the wake up code, the pi_lock of the current owner is taken. The top +waiter of the lock is found and removed from the wait_list of the mutex +as well as the pi_list of the current owner. The task field of the new +pending owner's waiter structure is set to NULL, and the owner field of the +mutex is set to the new owner with the "Pending Owner" bit set, as well +as the "Has Waiters" bit if there still are other processes blocked on the +mutex. + +The pi_lock of the previous owner is released, and the new pending owner's +pi_lock is taken. Remember that this is the trick to prevent the race +condition in rt_mutex_adjust_prio_chain from adding itself as a waiter +on the mutex. + +We now clear the "pi_blocked_on" field of the new pending owner, and if +the mutex still has waiters pending, we add the new top waiter to the pi_list +of the pending owner. + +Finally we unlock the pi_lock of the pending owner and wake it up. + + +Contact +------- + +For updates on this document, please email Steven Rostedt + + +Credits +------- + +Author: Steven Rostedt + +Reviewers: Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and Randy Dunlap + +Updates +------- + +This document was originally written for 2.6.17-rc3-mm1 diff --git a/Documentation/locking/rt-mutex.txt b/Documentation/locking/rt-mutex.txt new file mode 100644 index 000000000000..243393d882ee --- /dev/null +++ b/Documentation/locking/rt-mutex.txt @@ -0,0 +1,79 @@ +RT-mutex subsystem with PI support +---------------------------------- + +RT-mutexes with priority inheritance are used to support PI-futexes, +which enable pthread_mutex_t priority inheritance attributes +(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details +about PI-futexes.] + +This technology was developed in the -rt tree and streamlined for +pthread_mutex support. + +Basic principles: +----------------- + +RT-mutexes extend the semantics of simple mutexes by the priority +inheritance protocol. + +A low priority owner of a rt-mutex inherits the priority of a higher +priority waiter until the rt-mutex is released. If the temporarily +boosted owner blocks on a rt-mutex itself it propagates the priority +boosting to the owner of the other rt_mutex it gets blocked on. The +priority boosting is immediately removed once the rt_mutex has been +unlocked. + +This approach allows us to shorten the block of high-prio tasks on +mutexes which protect shared resources. Priority inheritance is not a +magic bullet for poorly designed applications, but it allows +well-designed applications to use userspace locks in critical parts of +an high priority thread, without losing determinism. + +The enqueueing of the waiters into the rtmutex waiter list is done in +priority order. For same priorities FIFO order is chosen. For each +rtmutex, only the top priority waiter is enqueued into the owner's +priority waiters list. This list too queues in priority order. Whenever +the top priority waiter of a task changes (for example it timed out or +got a signal), the priority of the owner task is readjusted. [The +priority enqueueing is handled by "plists", see include/linux/plist.h +for more details.] + +RT-mutexes are optimized for fastpath operations and have no internal +locking overhead when locking an uncontended mutex or unlocking a mutex +without waiters. The optimized fastpath operations require cmpxchg +support. [If that is not available then the rt-mutex internal spinlock +is used] + +The state of the rt-mutex is tracked via the owner field of the rt-mutex +structure: + +rt_mutex->owner holds the task_struct pointer of the owner. Bit 0 and 1 +are used to keep track of the "owner is pending" and "rtmutex has +waiters" state. + + owner bit1 bit0 + NULL 0 0 mutex is free (fast acquire possible) + NULL 0 1 invalid state + NULL 1 0 Transitional state* + NULL 1 1 invalid state + taskpointer 0 0 mutex is held (fast release possible) + taskpointer 0 1 task is pending owner + taskpointer 1 0 mutex is held and has waiters + taskpointer 1 1 task is pending owner and mutex has waiters + +Pending-ownership handling is a performance optimization: +pending-ownership is assigned to the first (highest priority) waiter of +the mutex, when the mutex is released. The thread is woken up and once +it starts executing it can acquire the mutex. Until the mutex is taken +by it (bit 0 is cleared) a competing higher priority thread can "steal" +the mutex which puts the woken up thread back on the waiters list. + +The pending-ownership optimization is especially important for the +uninterrupted workflow of high-prio tasks which repeatedly +takes/releases locks that have lower-prio waiters. Without this +optimization the higher-prio thread would ping-pong to the lower-prio +task [because at unlock time we always assign a new owner]. + +(*) The "mutex has waiters" bit gets set to take the lock. If the lock +doesn't already have an owner, this bit is quickly cleared if there are +no waiters. So this is a transitional state to synchronize with looking +at the owner field of the mutex and the mutex owner releasing the lock. diff --git a/Documentation/locking/spinlocks.txt b/Documentation/locking/spinlocks.txt new file mode 100644 index 000000000000..ff35e40bdf5b --- /dev/null +++ b/Documentation/locking/spinlocks.txt @@ -0,0 +1,167 @@ +Lesson 1: Spin locks + +The most basic primitive for locking is spinlock. + +static DEFINE_SPINLOCK(xxx_lock); + + unsigned long flags; + + spin_lock_irqsave(&xxx_lock, flags); + ... critical section here .. + spin_unlock_irqrestore(&xxx_lock, flags); + +The above is always safe. It will disable interrupts _locally_, but the +spinlock itself will guarantee the global lock, so it will guarantee that +there is only one thread-of-control within the region(s) protected by that +lock. This works well even under UP also, so the code does _not_ need to +worry about UP vs SMP issues: the spinlocks work correctly under both. + + NOTE! Implications of spin_locks for memory are further described in: + + Documentation/memory-barriers.txt + (5) LOCK operations. + (6) UNLOCK operations. + +The above is usually pretty simple (you usually need and want only one +spinlock for most things - using more than one spinlock can make things a +lot more complex and even slower and is usually worth it only for +sequences that you _know_ need to be split up: avoid it at all cost if you +aren't sure). + +This is really the only really hard part about spinlocks: once you start +using spinlocks they tend to expand to areas you might not have noticed +before, because you have to make sure the spinlocks correctly protect the +shared data structures _everywhere_ they are used. The spinlocks are most +easily added to places that are completely independent of other code (for +example, internal driver data structures that nobody else ever touches). + + NOTE! The spin-lock is safe only when you _also_ use the lock itself + to do locking across CPU's, which implies that EVERYTHING that + touches a shared variable has to agree about the spinlock they want + to use. + +---- + +Lesson 2: reader-writer spinlocks. + +If your data accesses have a very natural pattern where you usually tend +to mostly read from the shared variables, the reader-writer locks +(rw_lock) versions of the spinlocks are sometimes useful. They allow multiple +readers to be in the same critical region at once, but if somebody wants +to change the variables it has to get an exclusive write lock. + + NOTE! reader-writer locks require more atomic memory operations than + simple spinlocks. Unless the reader critical section is long, you + are better off just using spinlocks. + +The routines look the same as above: + + rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock); + + unsigned long flags; + + read_lock_irqsave(&xxx_lock, flags); + .. critical section that only reads the info ... + read_unlock_irqrestore(&xxx_lock, flags); + + write_lock_irqsave(&xxx_lock, flags); + .. read and write exclusive access to the info ... + write_unlock_irqrestore(&xxx_lock, flags); + +The above kind of lock may be useful for complex data structures like +linked lists, especially searching for entries without changing the list +itself. The read lock allows many concurrent readers. Anything that +_changes_ the list will have to get the write lock. + + NOTE! RCU is better for list traversal, but requires careful + attention to design detail (see Documentation/RCU/listRCU.txt). + +Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_ +time need to do any changes (even if you don't do it every time), you have +to get the write-lock at the very beginning. + + NOTE! We are working hard to remove reader-writer spinlocks in most + cases, so please don't add a new one without consensus. (Instead, see + Documentation/RCU/rcu.txt for complete information.) + +---- + +Lesson 3: spinlocks revisited. + +The single spin-lock primitives above are by no means the only ones. They +are the most safe ones, and the ones that work under all circumstances, +but partly _because_ they are safe they are also fairly slow. They are slower +than they'd need to be, because they do have to disable interrupts +(which is just a single instruction on a x86, but it's an expensive one - +and on other architectures it can be worse). + +If you have a case where you have to protect a data structure across +several CPU's and you want to use spinlocks you can potentially use +cheaper versions of the spinlocks. IFF you know that the spinlocks are +never used in interrupt handlers, you can use the non-irq versions: + + spin_lock(&lock); + ... + spin_unlock(&lock); + +(and the equivalent read-write versions too, of course). The spinlock will +guarantee the same kind of exclusive access, and it will be much faster. +This is useful if you know that the data in question is only ever +manipulated from a "process context", ie no interrupts involved. + +The reasons you mustn't use these versions if you have interrupts that +play with the spinlock is that you can get deadlocks: + + spin_lock(&lock); + ... + <- interrupt comes in: + spin_lock(&lock); + +where an interrupt tries to lock an already locked variable. This is ok if +the other interrupt happens on another CPU, but it is _not_ ok if the +interrupt happens on the same CPU that already holds the lock, because the +lock will obviously never be released (because the interrupt is waiting +for the lock, and the lock-holder is interrupted by the interrupt and will +not continue until the interrupt has been processed). + +(This is also the reason why the irq-versions of the spinlocks only need +to disable the _local_ interrupts - it's ok to use spinlocks in interrupts +on other CPU's, because an interrupt on another CPU doesn't interrupt the +CPU that holds the lock, so the lock-holder can continue and eventually +releases the lock). + +Note that you can be clever with read-write locks and interrupts. For +example, if you know that the interrupt only ever gets a read-lock, then +you can use a non-irq version of read locks everywhere - because they +don't block on each other (and thus there is no dead-lock wrt interrupts. +But when you do the write-lock, you have to use the irq-safe version. + +For an example of being clever with rw-locks, see the "waitqueue_lock" +handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from +within an interrupt, they only read the queue in order to know whom to +wake up. So read-locks are safe (which is good: they are very common +indeed), while write-locks need to protect themselves against interrupts. + + Linus + +---- + +Reference information: + +For dynamic initialization, use spin_lock_init() or rwlock_init() as +appropriate: + + spinlock_t xxx_lock; + rwlock_t xxx_rw_lock; + + static int __init xxx_init(void) + { + spin_lock_init(&xxx_lock); + rwlock_init(&xxx_rw_lock); + ... + } + + module_init(xxx_init); + +For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or +__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate. diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.txt new file mode 100644 index 000000000000..8a112dc304c3 --- /dev/null +++ b/Documentation/locking/ww-mutex-design.txt @@ -0,0 +1,344 @@ +Wait/Wound Deadlock-Proof Mutex Design +====================================== + +Please read mutex-design.txt first, as it applies to wait/wound mutexes too. + +Motivation for WW-Mutexes +------------------------- + +GPU's do operations that commonly involve many buffers. Those buffers +can be shared across contexts/processes, exist in different memory +domains (for example VRAM vs system memory), and so on. And with +PRIME / dmabuf, they can even be shared across devices. So there are +a handful of situations where the driver needs to wait for buffers to +become ready. If you think about this in terms of waiting on a buffer +mutex for it to become available, this presents a problem because +there is no way to guarantee that buffers appear in a execbuf/batch in +the same order in all contexts. That is directly under control of +userspace, and a result of the sequence of GL calls that an application +makes. Which results in the potential for deadlock. The problem gets +more complex when you consider that the kernel may need to migrate the +buffer(s) into VRAM before the GPU operates on the buffer(s), which +may in turn require evicting some other buffers (and you don't want to +evict other buffers which are already queued up to the GPU), but for a +simplified understanding of the problem you can ignore this. + +The algorithm that the TTM graphics subsystem came up with for dealing with +this problem is quite simple. For each group of buffers (execbuf) that need +to be locked, the caller would be assigned a unique reservation id/ticket, +from a global counter. In case of deadlock while locking all the buffers +associated with a execbuf, the one with the lowest reservation ticket (i.e. +the oldest task) wins, and the one with the higher reservation id (i.e. the +younger task) unlocks all of the buffers that it has already locked, and then +tries again. + +In the RDBMS literature this deadlock handling approach is called wait/wound: +The older tasks waits until it can acquire the contended lock. The younger tasks +needs to back off and drop all the locks it is currently holding, i.e. the +younger task is wounded. + +Concepts +-------- + +Compared to normal mutexes two additional concepts/objects show up in the lock +interface for w/w mutexes: + +Acquire context: To ensure eventual forward progress it is important the a task +trying to acquire locks doesn't grab a new reservation id, but keeps the one it +acquired when starting the lock acquisition. This ticket is stored in the +acquire context. Furthermore the acquire context keeps track of debugging state +to catch w/w mutex interface abuse. + +W/w class: In contrast to normal mutexes the lock class needs to be explicit for +w/w mutexes, since it is required to initialize the acquire context. + +Furthermore there are three different class of w/w lock acquire functions: + +* Normal lock acquisition with a context, using ww_mutex_lock. + +* Slowpath lock acquisition on the contending lock, used by the wounded task + after having dropped all already acquired locks. These functions have the + _slow postfix. + + From a simple semantics point-of-view the _slow functions are not strictly + required, since simply calling the normal ww_mutex_lock functions on the + contending lock (after having dropped all other already acquired locks) will + work correctly. After all if no other ww mutex has been acquired yet there's + no deadlock potential and hence the ww_mutex_lock call will block and not + prematurely return -EDEADLK. The advantage of the _slow functions is in + interface safety: + - ww_mutex_lock has a __must_check int return type, whereas ww_mutex_lock_slow + has a void return type. Note that since ww mutex code needs loops/retries + anyway the __must_check doesn't result in spurious warnings, even though the + very first lock operation can never fail. + - When full debugging is enabled ww_mutex_lock_slow checks that all acquired + ww mutex have been released (preventing deadlocks) and makes sure that we + block on the contending lock (preventing spinning through the -EDEADLK + slowpath until the contended lock can be acquired). + +* Functions to only acquire a single w/w mutex, which results in the exact same + semantics as a normal mutex. This is done by calling ww_mutex_lock with a NULL + context. + + Again this is not strictly required. But often you only want to acquire a + single lock in which case it's pointless to set up an acquire context (and so + better to avoid grabbing a deadlock avoidance ticket). + +Of course, all the usual variants for handling wake-ups due to signals are also +provided. + +Usage +----- + +Three different ways to acquire locks within the same w/w class. Common +definitions for methods #1 and #2: + +static DEFINE_WW_CLASS(ww_class); + +struct obj { + struct ww_mutex lock; + /* obj data */ +}; + +struct obj_entry { + struct list_head head; + struct obj *obj; +}; + +Method 1, using a list in execbuf->buffers that's not allowed to be reordered. +This is useful if a list of required objects is already tracked somewhere. +Furthermore the lock helper can use propagate the -EALREADY return code back to +the caller as a signal that an object is twice on the list. This is useful if +the list is constructed from userspace input and the ABI requires userspace to +not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl). + +int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) +{ + struct obj *res_obj = NULL; + struct obj_entry *contended_entry = NULL; + struct obj_entry *entry; + + ww_acquire_init(ctx, &ww_class); + +retry: + list_for_each_entry (entry, list, head) { + if (entry->obj == res_obj) { + res_obj = NULL; + continue; + } + ret = ww_mutex_lock(&entry->obj->lock, ctx); + if (ret < 0) { + contended_entry = entry; + goto err; + } + } + + ww_acquire_done(ctx); + return 0; + +err: + list_for_each_entry_continue_reverse (entry, list, head) + ww_mutex_unlock(&entry->obj->lock); + + if (res_obj) + ww_mutex_unlock(&res_obj->lock); + + if (ret == -EDEADLK) { + /* we lost out in a seqno race, lock and retry.. */ + ww_mutex_lock_slow(&contended_entry->obj->lock, ctx); + res_obj = contended_entry->obj; + goto retry; + } + ww_acquire_fini(ctx); + + return ret; +} + +Method 2, using a list in execbuf->buffers that can be reordered. Same semantics +of duplicate entry detection using -EALREADY as method 1 above. But the +list-reordering allows for a bit more idiomatic code. + +int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) +{ + struct obj_entry *entry, *entry2; + + ww_acquire_init(ctx, &ww_class); + + list_for_each_entry (entry, list, head) { + ret = ww_mutex_lock(&entry->obj->lock, ctx); + if (ret < 0) { + entry2 = entry; + + list_for_each_entry_continue_reverse (entry2, list, head) + ww_mutex_unlock(&entry2->obj->lock); + + if (ret != -EDEADLK) { + ww_acquire_fini(ctx); + return ret; + } + + /* we lost out in a seqno race, lock and retry.. */ + ww_mutex_lock_slow(&entry->obj->lock, ctx); + + /* + * Move buf to head of the list, this will point + * buf->next to the first unlocked entry, + * restarting the for loop. + */ + list_del(&entry->head); + list_add(&entry->head, list); + } + } + + ww_acquire_done(ctx); + return 0; +} + +Unlocking works the same way for both methods #1 and #2: + +void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) +{ + struct obj_entry *entry; + + list_for_each_entry (entry, list, head) + ww_mutex_unlock(&entry->obj->lock); + + ww_acquire_fini(ctx); +} + +Method 3 is useful if the list of objects is constructed ad-hoc and not upfront, +e.g. when adjusting edges in a graph where each node has its own ww_mutex lock, +and edges can only be changed when holding the locks of all involved nodes. w/w +mutexes are a natural fit for such a case for two reasons: +- They can handle lock-acquisition in any order which allows us to start walking + a graph from a starting point and then iteratively discovering new edges and + locking down the nodes those edges connect to. +- Due to the -EALREADY return code signalling that a given objects is already + held there's no need for additional book-keeping to break cycles in the graph + or keep track off which looks are already held (when using more than one node + as a starting point). + +Note that this approach differs in two important ways from the above methods: +- Since the list of objects is dynamically constructed (and might very well be + different when retrying due to hitting the -EDEADLK wound condition) there's + no need to keep any object on a persistent list when it's not locked. We can + therefore move the list_head into the object itself. +- On the other hand the dynamic object list construction also means that the -EALREADY return + code can't be propagated. + +Note also that methods #1 and #2 and method #3 can be combined, e.g. to first lock a +list of starting nodes (passed in from userspace) using one of the above +methods. And then lock any additional objects affected by the operations using +method #3 below. The backoff/retry procedure will be a bit more involved, since +when the dynamic locking step hits -EDEADLK we also need to unlock all the +objects acquired with the fixed list. But the w/w mutex debug checks will catch +any interface misuse for these cases. + +Also, method 3 can't fail the lock acquisition step since it doesn't return +-EALREADY. Of course this would be different when using the _interruptible +variants, but that's outside of the scope of these examples here. + +struct obj { + struct ww_mutex ww_mutex; + struct list_head locked_list; +}; + +static DEFINE_WW_CLASS(ww_class); + +void __unlock_objs(struct list_head *list) +{ + struct obj *entry, *temp; + + list_for_each_entry_safe (entry, temp, list, locked_list) { + /* need to do that before unlocking, since only the current lock holder is + allowed to use object */ + list_del(&entry->locked_list); + ww_mutex_unlock(entry->ww_mutex) + } +} + +void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) +{ + struct obj *obj; + + ww_acquire_init(ctx, &ww_class); + +retry: + /* re-init loop start state */ + loop { + /* magic code which walks over a graph and decides which objects + * to lock */ + + ret = ww_mutex_lock(obj->ww_mutex, ctx); + if (ret == -EALREADY) { + /* we have that one already, get to the next object */ + continue; + } + if (ret == -EDEADLK) { + __unlock_objs(list); + + ww_mutex_lock_slow(obj, ctx); + list_add(&entry->locked_list, list); + goto retry; + } + + /* locked a new object, add it to the list */ + list_add_tail(&entry->locked_list, list); + } + + ww_acquire_done(ctx); + return 0; +} + +void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) +{ + __unlock_objs(list); + ww_acquire_fini(ctx); +} + +Method 4: Only lock one single objects. In that case deadlock detection and +prevention is obviously overkill, since with grabbing just one lock you can't +produce a deadlock within just one class. To simplify this case the w/w mutex +api can be used with a NULL context. + +Implementation Details +---------------------- + +Design: + ww_mutex currently encapsulates a struct mutex, this means no extra overhead for + normal mutex locks, which are far more common. As such there is only a small + increase in code size if wait/wound mutexes are not used. + + In general, not much contention is expected. The locks are typically used to + serialize access to resources for devices. The only way to make wakeups + smarter would be at the cost of adding a field to struct mutex_waiter. This + would add overhead to all cases where normal mutexes are used, and + ww_mutexes are generally less performance sensitive. + +Lockdep: + Special care has been taken to warn for as many cases of api abuse + as possible. Some common api abuses will be caught with + CONFIG_DEBUG_MUTEXES, but CONFIG_PROVE_LOCKING is recommended. + + Some of the errors which will be warned about: + - Forgetting to call ww_acquire_fini or ww_acquire_init. + - Attempting to lock more mutexes after ww_acquire_done. + - Attempting to lock the wrong mutex after -EDEADLK and + unlocking all mutexes. + - Attempting to lock the right mutex after -EDEADLK, + before unlocking all mutexes. + + - Calling ww_mutex_lock_slow before -EDEADLK was returned. + + - Unlocking mutexes with the wrong unlock function. + - Calling one of the ww_acquire_* twice on the same context. + - Using a different ww_class for the mutex than for the ww_acquire_ctx. + - Normal lockdep errors that can result in deadlocks. + + Some of the lockdep errors that can result in deadlocks: + - Calling ww_acquire_init to initialize a second ww_acquire_ctx before + having called ww_acquire_fini on the first. + - 'normal' deadlocks that can occur. + +FIXME: Update this section once we have the TASK_DEADLOCK task state flag magic +implemented. diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt deleted file mode 100644 index 72d010689751..000000000000 --- a/Documentation/lockstat.txt +++ /dev/null @@ -1,178 +0,0 @@ - -LOCK STATISTICS - -- WHAT - -As the name suggests, it provides statistics on locks. - -- WHY - -Because things like lock contention can severely impact performance. - -- HOW - -Lockdep already has hooks in the lock functions and maps lock instances to -lock classes. We build on that (see Documentation/lockdep-design.txt). -The graph below shows the relation between the lock functions and the various -hooks therein. - - __acquire - | - lock _____ - | \ - | __contended - | | - | - | _______/ - |/ - | - __acquired - | - . - - . - | - __release - | - unlock - -lock, unlock - the regular lock functions -__* - the hooks -<> - states - -With these hooks we provide the following statistics: - - con-bounces - number of lock contention that involved x-cpu data - contentions - number of lock acquisitions that had to wait - wait time min - shortest (non-0) time we ever had to wait for a lock - max - longest time we ever had to wait for a lock - total - total time we spend waiting on this lock - avg - average time spent waiting on this lock - acq-bounces - number of lock acquisitions that involved x-cpu data - acquisitions - number of times we took the lock - hold time min - shortest (non-0) time we ever held the lock - max - longest time we ever held the lock - total - total time this lock was held - avg - average time this lock was held - -These numbers are gathered per lock class, per read/write state (when -applicable). - -It also tracks 4 contention points per class. A contention point is a call site -that had to wait on lock acquisition. - - - CONFIGURATION - -Lock statistics are enabled via CONFIG_LOCK_STAT. - - - USAGE - -Enable collection of statistics: - -# echo 1 >/proc/sys/kernel/lock_stat - -Disable collection of statistics: - -# echo 0 >/proc/sys/kernel/lock_stat - -Look at the current lock statistics: - -( line numbers not part of actual output, done for clarity in the explanation - below ) - -# less /proc/lock_stat - -01 lock_stat version 0.4 -02----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -03 class name con-bounces contentions waittime-min waittime-max waittime-total waittime-avg acq-bounces acquisitions holdtime-min holdtime-max holdtime-total holdtime-avg -04----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -05 -06 &mm->mmap_sem-W: 46 84 0.26 939.10 16371.53 194.90 47291 2922365 0.16 2220301.69 17464026916.32 5975.99 -07 &mm->mmap_sem-R: 37 100 1.31 299502.61 325629.52 3256.30 212344 34316685 0.10 7744.91 95016910.20 2.77 -08 --------------- -09 &mm->mmap_sem 1 [] khugepaged_scan_mm_slot+0x57/0x280 -19 &mm->mmap_sem 96 [] __do_page_fault+0x1d4/0x510 -11 &mm->mmap_sem 34 [] vm_mmap_pgoff+0x87/0xd0 -12 &mm->mmap_sem 17 [] vm_munmap+0x41/0x80 -13 --------------- -14 &mm->mmap_sem 1 [] dup_mmap+0x2a/0x3f0 -15 &mm->mmap_sem 60 [] SyS_mprotect+0xe9/0x250 -16 &mm->mmap_sem 41 [] __do_page_fault+0x1d4/0x510 -17 &mm->mmap_sem 68 [] vm_mmap_pgoff+0x87/0xd0 -18 -19............................................................................................................................................................................................................................. -20 -21 unix_table_lock: 110 112 0.21 49.24 163.91 1.46 21094 66312 0.12 624.42 31589.81 0.48 -22 --------------- -23 unix_table_lock 45 [] unix_create1+0x16e/0x1b0 -24 unix_table_lock 47 [] unix_release_sock+0x31/0x250 -25 unix_table_lock 15 [] unix_find_other+0x117/0x230 -26 unix_table_lock 5 [] unix_autobind+0x11f/0x1b0 -27 --------------- -28 unix_table_lock 39 [] unix_release_sock+0x31/0x250 -29 unix_table_lock 49 [] unix_create1+0x16e/0x1b0 -30 unix_table_lock 20 [] unix_find_other+0x117/0x230 -31 unix_table_lock 4 [] unix_autobind+0x11f/0x1b0 - - -This excerpt shows the first two lock class statistics. Line 01 shows the -output version - each time the format changes this will be updated. Line 02-04 -show the header with column descriptions. Lines 05-18 and 20-31 show the actual -statistics. These statistics come in two parts; the actual stats separated by a -short separator (line 08, 13) from the contention points. - -The first lock (05-18) is a read/write lock, and shows two lines above the -short separator. The contention points don't match the column descriptors, -they have two: contentions and [] symbol. The second set of contention -points are the points we're contending with. - -The integer part of the time values is in us. - -Dealing with nested locks, subclasses may appear: - -32........................................................................................................................................................................................................................... -33 -34 &rq->lock: 13128 13128 0.43 190.53 103881.26 7.91 97454 3453404 0.00 401.11 13224683.11 3.82 -35 --------- -36 &rq->lock 645 [] task_rq_lock+0x43/0x75 -37 &rq->lock 297 [] try_to_wake_up+0x127/0x25a -38 &rq->lock 360 [] select_task_rq_fair+0x1f0/0x74a -39 &rq->lock 428 [] scheduler_tick+0x46/0x1fb -40 --------- -41 &rq->lock 77 [] task_rq_lock+0x43/0x75 -42 &rq->lock 174 [] try_to_wake_up+0x127/0x25a -43 &rq->lock 4715 [] double_rq_lock+0x42/0x54 -44 &rq->lock 893 [] schedule+0x157/0x7b8 -45 -46........................................................................................................................................................................................................................... -47 -48 &rq->lock/1: 1526 11488 0.33 388.73 136294.31 11.86 21461 38404 0.00 37.93 109388.53 2.84 -49 ----------- -50 &rq->lock/1 11526 [] double_rq_lock+0x4f/0x54 -51 ----------- -52 &rq->lock/1 5645 [] double_rq_lock+0x42/0x54 -53 &rq->lock/1 1224 [] schedule+0x157/0x7b8 -54 &rq->lock/1 4336 [] double_rq_lock+0x4f/0x54 -55 &rq->lock/1 181 [] try_to_wake_up+0x127/0x25a - -Line 48 shows statistics for the second subclass (/1) of &rq->lock class -(subclass starts from 0), since in this case, as line 50 suggests, -double_rq_lock actually acquires a nested lock of two spinlocks. - -View the top contending locks: - -# grep : /proc/lock_stat | head - clockevents_lock: 2926159 2947636 0.15 46882.81 1784540466.34 605.41 3381345 3879161 0.00 2260.97 53178395.68 13.71 - tick_broadcast_lock: 346460 346717 0.18 2257.43 39364622.71 113.54 3642919 4242696 0.00 2263.79 49173646.60 11.59 - &mapping->i_mmap_mutex: 203896 203899 3.36 645530.05 31767507988.39 155800.21 3361776 8893984 0.17 2254.15 14110121.02 1.59 - &rq->lock: 135014 136909 0.18 606.09 842160.68 6.15 1540728 10436146 0.00 728.72 17606683.41 1.69 - &(&zone->lru_lock)->rlock: 93000 94934 0.16 59.18 188253.78 1.98 1199912 3809894 0.15 391.40 3559518.81 0.93 - tasklist_lock-W: 40667 41130 0.23 1189.42 428980.51 10.43 270278 510106 0.16 653.51 3939674.91 7.72 - tasklist_lock-R: 21298 21305 0.20 1310.05 215511.12 10.12 186204 241258 0.14 1162.33 1179779.23 4.89 - rcu_node_1: 47656 49022 0.16 635.41 193616.41 3.95 844888 1865423 0.00 764.26 1656226.96 0.89 - &(&dentry->d_lockref.lock)->rlock: 39791 40179 0.15 1302.08 88851.96 2.21 2790851 12527025 0.10 1910.75 3379714.27 0.27 - rcu_node_0: 29203 30064 0.16 786.55 1555573.00 51.74 88963 244254 0.00 398.87 428872.51 1.76 - -Clear the statistics: - -# echo 0 > /proc/lock_stat diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt deleted file mode 100644 index ee231ed09ec6..000000000000 --- a/Documentation/mutex-design.txt +++ /dev/null @@ -1,157 +0,0 @@ -Generic Mutex Subsystem - -started by Ingo Molnar -updated by Davidlohr Bueso - -What are mutexes? ------------------ - -In the Linux kernel, mutexes refer to a particular locking primitive -that enforces serialization on shared memory systems, and not only to -the generic term referring to 'mutual exclusion' found in academia -or similar theoretical text books. Mutexes are sleeping locks which -behave similarly to binary semaphores, and were introduced in 2006[1] -as an alternative to these. This new data structure provided a number -of advantages, including simpler interfaces, and at that time smaller -code (see Disadvantages). - -[1] http://lwn.net/Articles/164802/ - -Implementation --------------- - -Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h -and implemented in kernel/locking/mutex.c. These locks use a three -state atomic counter (->count) to represent the different possible -transitions that can occur during the lifetime of a lock: - - 1: unlocked - 0: locked, no waiters - negative: locked, with potential waiters - -In its most basic form it also includes a wait-queue and a spinlock -that serializes access to it. CONFIG_SMP systems can also include -a pointer to the lock task owner (->owner) as well as a spinner MCS -lock (->osq), both described below in (ii). - -When acquiring a mutex, there are three possible paths that can be -taken, depending on the state of the lock: - -(i) fastpath: tries to atomically acquire the lock by decrementing the - counter. If it was already taken by another task it goes to the next - possible path. This logic is architecture specific. On x86-64, the - locking fastpath is 2 instructions: - - 0000000000000e10 : - e21: f0 ff 0b lock decl (%rbx) - e24: 79 08 jns e2e - - the unlocking fastpath is equally tight: - - 0000000000000bc0 : - bc8: f0 ff 07 lock incl (%rdi) - bcb: 7f 0a jg bd7 - - -(ii) midpath: aka optimistic spinning, tries to spin for acquisition - while the lock owner is running and there are no other tasks ready - to run that have higher priority (need_resched). The rationale is - that if the lock owner is running, it is likely to release the lock - soon. The mutex spinners are queued up using MCS lock so that only - one spinner can compete for the mutex. - - The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock - with the desirable properties of being fair and with each cpu trying - to acquire the lock spinning on a local variable. It avoids expensive - cacheline bouncing that common test-and-set spinlock implementations - incur. An MCS-like lock is specially tailored for optimistic spinning - for sleeping lock implementation. An important feature of the customized - MCS lock is that it has the extra property that spinners are able to exit - the MCS spinlock queue when they need to reschedule. This further helps - avoid situations where MCS spinners that need to reschedule would continue - waiting to spin on mutex owner, only to go directly to slowpath upon - obtaining the MCS lock. - - -(iii) slowpath: last resort, if the lock is still unable to be acquired, - the task is added to the wait-queue and sleeps until woken up by the - unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE. - -While formally kernel mutexes are sleepable locks, it is path (ii) that -makes them more practically a hybrid type. By simply not interrupting a -task and busy-waiting for a few cycles instead of immediately sleeping, -the performance of this lock has been seen to significantly improve a -number of workloads. Note that this technique is also used for rw-semaphores. - -Semantics ---------- - -The mutex subsystem checks and enforces the following rules: - - - Only one task can hold the mutex at a time. - - Only the owner can unlock the mutex. - - Multiple unlocks are not permitted. - - Recursive locking/unlocking is not permitted. - - A mutex must only be initialized via the API (see below). - - A task may not exit with a mutex held. - - Memory areas where held locks reside must not be freed. - - Held mutexes must not be reinitialized. - - Mutexes may not be used in hardware or software interrupt - contexts such as tasklets and timers. - -These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled. -In addition, the mutex debugging code also implements a number of other -features that make lock debugging easier and faster: - - - Uses symbolic names of mutexes, whenever they are printed - in debug output. - - Point-of-acquire tracking, symbolic lookup of function names, - list of all locks held in the system, printout of them. - - Owner tracking. - - Detects self-recursing locks and prints out all relevant info. - - Detects multi-task circular deadlocks and prints out all affected - locks and tasks (and only those tasks). - - -Interfaces ----------- -Statically define the mutex: - DEFINE_MUTEX(name); - -Dynamically initialize the mutex: - mutex_init(mutex); - -Acquire the mutex, uninterruptible: - void mutex_lock(struct mutex *lock); - void mutex_lock_nested(struct mutex *lock, unsigned int subclass); - int mutex_trylock(struct mutex *lock); - -Acquire the mutex, interruptible: - int mutex_lock_interruptible_nested(struct mutex *lock, - unsigned int subclass); - int mutex_lock_interruptible(struct mutex *lock); - -Acquire the mutex, interruptible, if dec to 0: - int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); - -Unlock the mutex: - void mutex_unlock(struct mutex *lock); - -Test if the mutex is taken: - int mutex_is_locked(struct mutex *lock); - -Disadvantages -------------- - -Unlike its original design and purpose, 'struct mutex' is larger than -most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice -as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the -'struct rw_semaphore' variant. Larger structure sizes mean more CPU -cache and memory footprint. - -When to use mutexes -------------------- - -Unless the strict semantics of mutexes are unsuitable and/or the critical -region prevents the lock from being shared, always prefer them to any other -locking primitive. diff --git a/Documentation/rt-mutex-design.txt b/Documentation/rt-mutex-design.txt deleted file mode 100644 index 8666070d3189..000000000000 --- a/Documentation/rt-mutex-design.txt +++ /dev/null @@ -1,781 +0,0 @@ -# -# Copyright (c) 2006 Steven Rostedt -# Licensed under the GNU Free Documentation License, Version 1.2 -# - -RT-mutex implementation design ------------------------------- - -This document tries to describe the design of the rtmutex.c implementation. -It doesn't describe the reasons why rtmutex.c exists. For that please see -Documentation/rt-mutex.txt. Although this document does explain problems -that happen without this code, but that is in the concept to understand -what the code actually is doing. - -The goal of this document is to help others understand the priority -inheritance (PI) algorithm that is used, as well as reasons for the -decisions that were made to implement PI in the manner that was done. - - -Unbounded Priority Inversion ----------------------------- - -Priority inversion is when a lower priority process executes while a higher -priority process wants to run. This happens for several reasons, and -most of the time it can't be helped. Anytime a high priority process wants -to use a resource that a lower priority process has (a mutex for example), -the high priority process must wait until the lower priority process is done -with the resource. This is a priority inversion. What we want to prevent -is something called unbounded priority inversion. That is when the high -priority process is prevented from running by a lower priority process for -an undetermined amount of time. - -The classic example of unbounded priority inversion is where you have three -processes, let's call them processes A, B, and C, where A is the highest -priority process, C is the lowest, and B is in between. A tries to grab a lock -that C owns and must wait and lets C run to release the lock. But in the -meantime, B executes, and since B is of a higher priority than C, it preempts C, -but by doing so, it is in fact preempting A which is a higher priority process. -Now there's no way of knowing how long A will be sleeping waiting for C -to release the lock, because for all we know, B is a CPU hog and will -never give C a chance to release the lock. This is called unbounded priority -inversion. - -Here's a little ASCII art to show the problem. - - grab lock L1 (owned by C) - | -A ---+ - C preempted by B - | -C +----+ - -B +--------> - B now keeps A from running. - - -Priority Inheritance (PI) -------------------------- - -There are several ways to solve this issue, but other ways are out of scope -for this document. Here we only discuss PI. - -PI is where a process inherits the priority of another process if the other -process blocks on a lock owned by the current process. To make this easier -to understand, let's use the previous example, with processes A, B, and C again. - -This time, when A blocks on the lock owned by C, C would inherit the priority -of A. So now if B becomes runnable, it would not preempt C, since C now has -the high priority of A. As soon as C releases the lock, it loses its -inherited priority, and A then can continue with the resource that C had. - -Terminology ------------ - -Here I explain some terminology that is used in this document to help describe -the design that is used to implement PI. - -PI chain - The PI chain is an ordered series of locks and processes that cause - processes to inherit priorities from a previous process that is - blocked on one of its locks. This is described in more detail - later in this document. - -mutex - In this document, to differentiate from locks that implement - PI and spin locks that are used in the PI code, from now on - the PI locks will be called a mutex. - -lock - In this document from now on, I will use the term lock when - referring to spin locks that are used to protect parts of the PI - algorithm. These locks disable preemption for UP (when - CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from - entering critical sections simultaneously. - -spin lock - Same as lock above. - -waiter - A waiter is a struct that is stored on the stack of a blocked - process. Since the scope of the waiter is within the code for - a process being blocked on the mutex, it is fine to allocate - the waiter on the process's stack (local variable). This - structure holds a pointer to the task, as well as the mutex that - the task is blocked on. It also has the plist node structures to - place the task in the waiter_list of a mutex as well as the - pi_list of a mutex owner task (described below). - - waiter is sometimes used in reference to the task that is waiting - on a mutex. This is the same as waiter->task. - -waiters - A list of processes that are blocked on a mutex. - -top waiter - The highest priority process waiting on a specific mutex. - -top pi waiter - The highest priority process waiting on one of the mutexes - that a specific process owns. - -Note: task and process are used interchangeably in this document, mostly to - differentiate between two processes that are being described together. - - -PI chain --------- - -The PI chain is a list of processes and mutexes that may cause priority -inheritance to take place. Multiple chains may converge, but a chain -would never diverge, since a process can't be blocked on more than one -mutex at a time. - -Example: - - Process: A, B, C, D, E - Mutexes: L1, L2, L3, L4 - - A owns: L1 - B blocked on L1 - B owns L2 - C blocked on L2 - C owns L3 - D blocked on L3 - D owns L4 - E blocked on L4 - -The chain would be: - - E->L4->D->L3->C->L2->B->L1->A - -To show where two chains merge, we could add another process F and -another mutex L5 where B owns L5 and F is blocked on mutex L5. - -The chain for F would be: - - F->L5->B->L1->A - -Since a process may own more than one mutex, but never be blocked on more than -one, the chains merge. - -Here we show both chains: - - E->L4->D->L3->C->L2-+ - | - +->B->L1->A - | - F->L5-+ - -For PI to work, the processes at the right end of these chains (or we may -also call it the Top of the chain) must be equal to or higher in priority -than the processes to the left or below in the chain. - -Also since a mutex may have more than one process blocked on it, we can -have multiple chains merge at mutexes. If we add another process G that is -blocked on mutex L2: - - G->L2->B->L1->A - -And once again, to show how this can grow I will show the merging chains -again. - - E->L4->D->L3->C-+ - +->L2-+ - | | - G-+ +->B->L1->A - | - F->L5-+ - - -Plist ------ - -Before I go further and talk about how the PI chain is stored through lists -on both mutexes and processes, I'll explain the plist. This is similar to -the struct list_head functionality that is already in the kernel. -The implementation of plist is out of scope for this document, but it is -very important to understand what it does. - -There are a few differences between plist and list, the most important one -being that plist is a priority sorted linked list. This means that the -priorities of the plist are sorted, such that it takes O(1) to retrieve the -highest priority item in the list. Obviously this is useful to store processes -based on their priorities. - -Another difference, which is important for implementation, is that, unlike -list, the head of the list is a different element than the nodes of a list. -So the head of the list is declared as struct plist_head and nodes that will -be added to the list are declared as struct plist_node. - - -Mutex Waiter List ------------------ - -Every mutex keeps track of all the waiters that are blocked on itself. The mutex -has a plist to store these waiters by priority. This list is protected by -a spin lock that is located in the struct of the mutex. This lock is called -wait_lock. Since the modification of the waiter list is never done in -interrupt context, the wait_lock can be taken without disabling interrupts. - - -Task PI List ------------- - -To keep track of the PI chains, each process has its own PI list. This is -a list of all top waiters of the mutexes that are owned by the process. -Note that this list only holds the top waiters and not all waiters that are -blocked on mutexes owned by the process. - -The top of the task's PI list is always the highest priority task that -is waiting on a mutex that is owned by the task. So if the task has -inherited a priority, it will always be the priority of the task that is -at the top of this list. - -This list is stored in the task structure of a process as a plist called -pi_list. This list is protected by a spin lock also in the task structure, -called pi_lock. This lock may also be taken in interrupt context, so when -locking the pi_lock, interrupts must be disabled. - - -Depth of the PI Chain ---------------------- - -The maximum depth of the PI chain is not dynamic, and could actually be -defined. But is very complex to figure it out, since it depends on all -the nesting of mutexes. Let's look at the example where we have 3 mutexes, -L1, L2, and L3, and four separate functions func1, func2, func3 and func4. -The following shows a locking order of L1->L2->L3, but may not actually -be directly nested that way. - -void func1(void) -{ - mutex_lock(L1); - - /* do anything */ - - mutex_unlock(L1); -} - -void func2(void) -{ - mutex_lock(L1); - mutex_lock(L2); - - /* do something */ - - mutex_unlock(L2); - mutex_unlock(L1); -} - -void func3(void) -{ - mutex_lock(L2); - mutex_lock(L3); - - /* do something else */ - - mutex_unlock(L3); - mutex_unlock(L2); -} - -void func4(void) -{ - mutex_lock(L3); - - /* do something again */ - - mutex_unlock(L3); -} - -Now we add 4 processes that run each of these functions separately. -Processes A, B, C, and D which run functions func1, func2, func3 and func4 -respectively, and such that D runs first and A last. With D being preempted -in func4 in the "do something again" area, we have a locking that follows: - -D owns L3 - C blocked on L3 - C owns L2 - B blocked on L2 - B owns L1 - A blocked on L1 - -And thus we have the chain A->L1->B->L2->C->L3->D. - -This gives us a PI depth of 4 (four processes), but looking at any of the -functions individually, it seems as though they only have at most a locking -depth of two. So, although the locking depth is defined at compile time, -it still is very difficult to find the possibilities of that depth. - -Now since mutexes can be defined by user-land applications, we don't want a DOS -type of application that nests large amounts of mutexes to create a large -PI chain, and have the code holding spin locks while looking at a large -amount of data. So to prevent this, the implementation not only implements -a maximum lock depth, but also only holds at most two different locks at a -time, as it walks the PI chain. More about this below. - - -Mutex owner and flags ---------------------- - -The mutex structure contains a pointer to the owner of the mutex. If the -mutex is not owned, this owner is set to NULL. Since all architectures -have the task structure on at least a four byte alignment (and if this is -not true, the rtmutex.c code will be broken!), this allows for the two -least significant bits to be used as flags. This part is also described -in Documentation/rt-mutex.txt, but will also be briefly described here. - -Bit 0 is used as the "Pending Owner" flag. This is described later. -Bit 1 is used as the "Has Waiters" flags. This is also described later - in more detail, but is set whenever there are waiters on a mutex. - - -cmpxchg Tricks --------------- - -Some architectures implement an atomic cmpxchg (Compare and Exchange). This -is used (when applicable) to keep the fast path of grabbing and releasing -mutexes short. - -cmpxchg is basically the following function performed atomically: - -unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C) -{ - unsigned long T = *A; - if (*A == *B) { - *A = *C; - } - return T; -} -#define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c) - -This is really nice to have, since it allows you to only update a variable -if the variable is what you expect it to be. You know if it succeeded if -the return value (the old value of A) is equal to B. - -The macro rt_mutex_cmpxchg is used to try to lock and unlock mutexes. If -the architecture does not support CMPXCHG, then this macro is simply set -to fail every time. But if CMPXCHG is supported, then this will -help out extremely to keep the fast path short. - -The use of rt_mutex_cmpxchg with the flags in the owner field help optimize -the system for architectures that support it. This will also be explained -later in this document. - - -Priority adjustments --------------------- - -The implementation of the PI code in rtmutex.c has several places that a -process must adjust its priority. With the help of the pi_list of a -process this is rather easy to know what needs to be adjusted. - -The functions implementing the task adjustments are rt_mutex_adjust_prio, -__rt_mutex_adjust_prio (same as the former, but expects the task pi_lock -to already be taken), rt_mutex_getprio, and rt_mutex_setprio. - -rt_mutex_getprio and rt_mutex_setprio are only used in __rt_mutex_adjust_prio. - -rt_mutex_getprio returns the priority that the task should have. Either the -task's own normal priority, or if a process of a higher priority is waiting on -a mutex owned by the task, then that higher priority should be returned. -Since the pi_list of a task holds an order by priority list of all the top -waiters of all the mutexes that the task owns, rt_mutex_getprio simply needs -to compare the top pi waiter to its own normal priority, and return the higher -priority back. - -(Note: if looking at the code, you will notice that the lower number of - prio is returned. This is because the prio field in the task structure - is an inverse order of the actual priority. So a "prio" of 5 is - of higher priority than a "prio" of 10.) - -__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the -result does not equal the task's current priority, then rt_mutex_setprio -is called to adjust the priority of the task to the new priority. -Note that rt_mutex_setprio is defined in kernel/sched/core.c to implement the -actual change in priority. - -It is interesting to note that __rt_mutex_adjust_prio can either increase -or decrease the priority of the task. In the case that a higher priority -process has just blocked on a mutex owned by the task, __rt_mutex_adjust_prio -would increase/boost the task's priority. But if a higher priority task -were for some reason to leave the mutex (timeout or signal), this same function -would decrease/unboost the priority of the task. That is because the pi_list -always contains the highest priority task that is waiting on a mutex owned -by the task, so we only need to compare the priority of that top pi waiter -to the normal priority of the given task. - - -High level overview of the PI chain walk ----------------------------------------- - -The PI chain walk is implemented by the function rt_mutex_adjust_prio_chain. - -The implementation has gone through several iterations, and has ended up -with what we believe is the best. It walks the PI chain by only grabbing -at most two locks at a time, and is very efficient. - -The rt_mutex_adjust_prio_chain can be used either to boost or lower process -priorities. - -rt_mutex_adjust_prio_chain is called with a task to be checked for PI -(de)boosting (the owner of a mutex that a process is blocking on), a flag to -check for deadlocking, the mutex that the task owns, and a pointer to a waiter -that is the process's waiter struct that is blocked on the mutex (although this -parameter may be NULL for deboosting). - -For this explanation, I will not mention deadlock detection. This explanation -will try to stay at a high level. - -When this function is called, there are no locks held. That also means -that the state of the owner and lock can change when entered into this function. - -Before this function is called, the task has already had rt_mutex_adjust_prio -performed on it. This means that the task is set to the priority that it -should be at, but the plist nodes of the task's waiter have not been updated -with the new priorities, and that this task may not be in the proper locations -in the pi_lists and wait_lists that the task is blocked on. This function -solves all that. - -A loop is entered, where task is the owner to be checked for PI changes that -was passed by parameter (for the first iteration). The pi_lock of this task is -taken to prevent any more changes to the pi_list of the task. This also -prevents new tasks from completing the blocking on a mutex that is owned by this -task. - -If the task is not blocked on a mutex then the loop is exited. We are at -the top of the PI chain. - -A check is now done to see if the original waiter (the process that is blocked -on the current mutex) is the top pi waiter of the task. That is, is this -waiter on the top of the task's pi_list. If it is not, it either means that -there is another process higher in priority that is blocked on one of the -mutexes that the task owns, or that the waiter has just woken up via a signal -or timeout and has left the PI chain. In either case, the loop is exited, since -we don't need to do any more changes to the priority of the current task, or any -task that owns a mutex that this current task is waiting on. A priority chain -walk is only needed when a new top pi waiter is made to a task. - -The next check sees if the task's waiter plist node has the priority equal to -the priority the task is set at. If they are equal, then we are done with -the loop. Remember that the function started with the priority of the -task adjusted, but the plist nodes that hold the task in other processes -pi_lists have not been adjusted. - -Next, we look at the mutex that the task is blocked on. The mutex's wait_lock -is taken. This is done by a spin_trylock, because the locking order of the -pi_lock and wait_lock goes in the opposite direction. If we fail to grab the -lock, the pi_lock is released, and we restart the loop. - -Now that we have both the pi_lock of the task as well as the wait_lock of -the mutex the task is blocked on, we update the task's waiter's plist node -that is located on the mutex's wait_list. - -Now we release the pi_lock of the task. - -Next the owner of the mutex has its pi_lock taken, so we can update the -task's entry in the owner's pi_list. If the task is the highest priority -process on the mutex's wait_list, then we remove the previous top waiter -from the owner's pi_list, and replace it with the task. - -Note: It is possible that the task was the current top waiter on the mutex, - in which case the task is not yet on the pi_list of the waiter. This - is OK, since plist_del does nothing if the plist node is not on any - list. - -If the task was not the top waiter of the mutex, but it was before we -did the priority updates, that means we are deboosting/lowering the -task. In this case, the task is removed from the pi_list of the owner, -and the new top waiter is added. - -Lastly, we unlock both the pi_lock of the task, as well as the mutex's -wait_lock, and continue the loop again. On the next iteration of the -loop, the previous owner of the mutex will be the task that will be -processed. - -Note: One might think that the owner of this mutex might have changed - since we just grab the mutex's wait_lock. And one could be right. - The important thing to remember is that the owner could not have - become the task that is being processed in the PI chain, since - we have taken that task's pi_lock at the beginning of the loop. - So as long as there is an owner of this mutex that is not the same - process as the tasked being worked on, we are OK. - - Looking closely at the code, one might be confused. The check for the - end of the PI chain is when the task isn't blocked on anything or the - task's waiter structure "task" element is NULL. This check is - protected only by the task's pi_lock. But the code to unlock the mutex - sets the task's waiter structure "task" element to NULL with only - the protection of the mutex's wait_lock, which was not taken yet. - Isn't this a race condition if the task becomes the new owner? - - The answer is No! The trick is the spin_trylock of the mutex's - wait_lock. If we fail that lock, we release the pi_lock of the - task and continue the loop, doing the end of PI chain check again. - - In the code to release the lock, the wait_lock of the mutex is held - the entire time, and it is not let go when we grab the pi_lock of the - new owner of the mutex. So if the switch of a new owner were to happen - after the check for end of the PI chain and the grabbing of the - wait_lock, the unlocking code would spin on the new owner's pi_lock - but never give up the wait_lock. So the PI chain loop is guaranteed to - fail the spin_trylock on the wait_lock, release the pi_lock, and - try again. - - If you don't quite understand the above, that's OK. You don't have to, - unless you really want to make a proof out of it ;) - - -Pending Owners and Lock stealing --------------------------------- - -One of the flags in the owner field of the mutex structure is "Pending Owner". -What this means is that an owner was chosen by the process releasing the -mutex, but that owner has yet to wake up and actually take the mutex. - -Why is this important? Why can't we just give the mutex to another process -and be done with it? - -The PI code is to help with real-time processes, and to let the highest -priority process run as long as possible with little latencies and delays. -If a high priority process owns a mutex that a lower priority process is -blocked on, when the mutex is released it would be given to the lower priority -process. What if the higher priority process wants to take that mutex again. -The high priority process would fail to take that mutex that it just gave up -and it would need to boost the lower priority process to run with full -latency of that critical section (since the low priority process just entered -it). - -There's no reason a high priority process that gives up a mutex should be -penalized if it tries to take that mutex again. If the new owner of the -mutex has not woken up yet, there's no reason that the higher priority process -could not take that mutex away. - -To solve this, we introduced Pending Ownership and Lock Stealing. When a -new process is given a mutex that it was blocked on, it is only given -pending ownership. This means that it's the new owner, unless a higher -priority process comes in and tries to grab that mutex. If a higher priority -process does come along and wants that mutex, we let the higher priority -process "steal" the mutex from the pending owner (only if it is still pending) -and continue with the mutex. - - -Taking of a mutex (The walk through) ------------------------------------- - -OK, now let's take a look at the detailed walk through of what happens when -taking a mutex. - -The first thing that is tried is the fast taking of the mutex. This is -done when we have CMPXCHG enabled (otherwise the fast taking automatically -fails). Only when the owner field of the mutex is NULL can the lock be -taken with the CMPXCHG and nothing else needs to be done. - -If there is contention on the lock, whether it is owned or pending owner -we go about the slow path (rt_mutex_slowlock). - -The slow path function is where the task's waiter structure is created on -the stack. This is because the waiter structure is only needed for the -scope of this function. The waiter structure holds the nodes to store -the task on the wait_list of the mutex, and if need be, the pi_list of -the owner. - -The wait_lock of the mutex is taken since the slow path of unlocking the -mutex also takes this lock. - -We then call try_to_take_rt_mutex. This is where the architecture that -does not implement CMPXCHG would always grab the lock (if there's no -contention). - -try_to_take_rt_mutex is used every time the task tries to grab a mutex in the -slow path. The first thing that is done here is an atomic setting of -the "Has Waiters" flag of the mutex's owner field. Yes, this could really -be false, because if the mutex has no owner, there are no waiters and -the current task also won't have any waiters. But we don't have the lock -yet, so we assume we are going to be a waiter. The reason for this is to -play nice for those architectures that do have CMPXCHG. By setting this flag -now, the owner of the mutex can't release the mutex without going into the -slow unlock path, and it would then need to grab the wait_lock, which this -code currently holds. So setting the "Has Waiters" flag forces the owner -to synchronize with this code. - -Now that we know that we can't have any races with the owner releasing the -mutex, we check to see if we can take the ownership. This is done if the -mutex doesn't have a owner, or if we can steal the mutex from a pending -owner. Let's look at the situations we have here. - - 1) Has owner that is pending - ---------------------------- - - The mutex has a owner, but it hasn't woken up and the mutex flag - "Pending Owner" is set. The first check is to see if the owner isn't the - current task. This is because this function is also used for the pending - owner to grab the mutex. When a pending owner wakes up, it checks to see - if it can take the mutex, and this is done if the owner is already set to - itself. If so, we succeed and leave the function, clearing the "Pending - Owner" bit. - - If the pending owner is not current, we check to see if the current priority is - higher than the pending owner. If not, we fail the function and return. - - There's also something special about a pending owner. That is a pending owner - is never blocked on a mutex. So there is no PI chain to worry about. It also - means that if the mutex doesn't have any waiters, there's no accounting needed - to update the pending owner's pi_list, since we only worry about processes - blocked on the current mutex. - - If there are waiters on this mutex, and we just stole the ownership, we need - to take the top waiter, remove it from the pi_list of the pending owner, and - add it to the current pi_list. Note that at this moment, the pending owner - is no longer on the list of waiters. This is fine, since the pending owner - would add itself back when it realizes that it had the ownership stolen - from itself. When the pending owner tries to grab the mutex, it will fail - in try_to_take_rt_mutex if the owner field points to another process. - - 2) No owner - ----------- - - If there is no owner (or we successfully stole the lock), we set the owner - of the mutex to current, and set the flag of "Has Waiters" if the current - mutex actually has waiters, or we clear the flag if it doesn't. See, it was - OK that we set that flag early, since now it is cleared. - - 3) Failed to grab ownership - --------------------------- - - The most interesting case is when we fail to take ownership. This means that - there exists an owner, or there's a pending owner with equal or higher - priority than the current task. - -We'll continue on the failed case. - -If the mutex has a timeout, we set up a timer to go off to break us out -of this mutex if we failed to get it after a specified amount of time. - -Now we enter a loop that will continue to try to take ownership of the mutex, or -fail from a timeout or signal. - -Once again we try to take the mutex. This will usually fail the first time -in the loop, since it had just failed to get the mutex. But the second time -in the loop, this would likely succeed, since the task would likely be -the pending owner. - -If the mutex is TASK_INTERRUPTIBLE a check for signals and timeout is done -here. - -The waiter structure has a "task" field that points to the task that is blocked -on the mutex. This field can be NULL the first time it goes through the loop -or if the task is a pending owner and had its mutex stolen. If the "task" -field is NULL then we need to set up the accounting for it. - -Task blocks on mutex --------------------- - -The accounting of a mutex and process is done with the waiter structure of -the process. The "task" field is set to the process, and the "lock" field -to the mutex. The plist nodes are initialized to the processes current -priority. - -Since the wait_lock was taken at the entry of the slow lock, we can safely -add the waiter to the wait_list. If the current process is the highest -priority process currently waiting on this mutex, then we remove the -previous top waiter process (if it exists) from the pi_list of the owner, -and add the current process to that list. Since the pi_list of the owner -has changed, we call rt_mutex_adjust_prio on the owner to see if the owner -should adjust its priority accordingly. - -If the owner is also blocked on a lock, and had its pi_list changed -(or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead -and run rt_mutex_adjust_prio_chain on the owner, as described earlier. - -Now all locks are released, and if the current process is still blocked on a -mutex (waiter "task" field is not NULL), then we go to sleep (call schedule). - -Waking up in the loop ---------------------- - -The schedule can then wake up for a few reasons. - 1) we were given pending ownership of the mutex. - 2) we received a signal and was TASK_INTERRUPTIBLE - 3) we had a timeout and was TASK_INTERRUPTIBLE - -In any of these cases, we continue the loop and once again try to grab the -ownership of the mutex. If we succeed, we exit the loop, otherwise we continue -and on signal and timeout, will exit the loop, or if we had the mutex stolen -we just simply add ourselves back on the lists and go back to sleep. - -Note: For various reasons, because of timeout and signals, the steal mutex - algorithm needs to be careful. This is because the current process is - still on the wait_list. And because of dynamic changing of priorities, - especially on SCHED_OTHER tasks, the current process can be the - highest priority task on the wait_list. - -Failed to get mutex on Timeout or Signal ----------------------------------------- - -If a timeout or signal occurred, the waiter's "task" field would not be -NULL and the task needs to be taken off the wait_list of the mutex and perhaps -pi_list of the owner. If this process was a high priority process, then -the rt_mutex_adjust_prio_chain needs to be executed again on the owner, -but this time it will be lowering the priorities. - - -Unlocking the Mutex -------------------- - -The unlocking of a mutex also has a fast path for those architectures with -CMPXCHG. Since the taking of a mutex on contention always sets the -"Has Waiters" flag of the mutex's owner, we use this to know if we need to -take the slow path when unlocking the mutex. If the mutex doesn't have any -waiters, the owner field of the mutex would equal the current process and -the mutex can be unlocked by just replacing the owner field with NULL. - -If the owner field has the "Has Waiters" bit set (or CMPXCHG is not available), -the slow unlock path is taken. - -The first thing done in the slow unlock path is to take the wait_lock of the -mutex. This synchronizes the locking and unlocking of the mutex. - -A check is made to see if the mutex has waiters or not. On architectures that -do not have CMPXCHG, this is the location that the owner of the mutex will -determine if a waiter needs to be awoken or not. On architectures that -do have CMPXCHG, that check is done in the fast path, but it is still needed -in the slow path too. If a waiter of a mutex woke up because of a signal -or timeout between the time the owner failed the fast path CMPXCHG check and -the grabbing of the wait_lock, the mutex may not have any waiters, thus the -owner still needs to make this check. If there are no waiters then the mutex -owner field is set to NULL, the wait_lock is released and nothing more is -needed. - -If there are waiters, then we need to wake one up and give that waiter -pending ownership. - -On the wake up code, the pi_lock of the current owner is taken. The top -waiter of the lock is found and removed from the wait_list of the mutex -as well as the pi_list of the current owner. The task field of the new -pending owner's waiter structure is set to NULL, and the owner field of the -mutex is set to the new owner with the "Pending Owner" bit set, as well -as the "Has Waiters" bit if there still are other processes blocked on the -mutex. - -The pi_lock of the previous owner is released, and the new pending owner's -pi_lock is taken. Remember that this is the trick to prevent the race -condition in rt_mutex_adjust_prio_chain from adding itself as a waiter -on the mutex. - -We now clear the "pi_blocked_on" field of the new pending owner, and if -the mutex still has waiters pending, we add the new top waiter to the pi_list -of the pending owner. - -Finally we unlock the pi_lock of the pending owner and wake it up. - - -Contact -------- - -For updates on this document, please email Steven Rostedt - - -Credits -------- - -Author: Steven Rostedt - -Reviewers: Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and Randy Dunlap - -Updates -------- - -This document was originally written for 2.6.17-rc3-mm1 diff --git a/Documentation/rt-mutex.txt b/Documentation/rt-mutex.txt deleted file mode 100644 index 243393d882ee..000000000000 --- a/Documentation/rt-mutex.txt +++ /dev/null @@ -1,79 +0,0 @@ -RT-mutex subsystem with PI support ----------------------------------- - -RT-mutexes with priority inheritance are used to support PI-futexes, -which enable pthread_mutex_t priority inheritance attributes -(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details -about PI-futexes.] - -This technology was developed in the -rt tree and streamlined for -pthread_mutex support. - -Basic principles: ------------------ - -RT-mutexes extend the semantics of simple mutexes by the priority -inheritance protocol. - -A low priority owner of a rt-mutex inherits the priority of a higher -priority waiter until the rt-mutex is released. If the temporarily -boosted owner blocks on a rt-mutex itself it propagates the priority -boosting to the owner of the other rt_mutex it gets blocked on. The -priority boosting is immediately removed once the rt_mutex has been -unlocked. - -This approach allows us to shorten the block of high-prio tasks on -mutexes which protect shared resources. Priority inheritance is not a -magic bullet for poorly designed applications, but it allows -well-designed applications to use userspace locks in critical parts of -an high priority thread, without losing determinism. - -The enqueueing of the waiters into the rtmutex waiter list is done in -priority order. For same priorities FIFO order is chosen. For each -rtmutex, only the top priority waiter is enqueued into the owner's -priority waiters list. This list too queues in priority order. Whenever -the top priority waiter of a task changes (for example it timed out or -got a signal), the priority of the owner task is readjusted. [The -priority enqueueing is handled by "plists", see include/linux/plist.h -for more details.] - -RT-mutexes are optimized for fastpath operations and have no internal -locking overhead when locking an uncontended mutex or unlocking a mutex -without waiters. The optimized fastpath operations require cmpxchg -support. [If that is not available then the rt-mutex internal spinlock -is used] - -The state of the rt-mutex is tracked via the owner field of the rt-mutex -structure: - -rt_mutex->owner holds the task_struct pointer of the owner. Bit 0 and 1 -are used to keep track of the "owner is pending" and "rtmutex has -waiters" state. - - owner bit1 bit0 - NULL 0 0 mutex is free (fast acquire possible) - NULL 0 1 invalid state - NULL 1 0 Transitional state* - NULL 1 1 invalid state - taskpointer 0 0 mutex is held (fast release possible) - taskpointer 0 1 task is pending owner - taskpointer 1 0 mutex is held and has waiters - taskpointer 1 1 task is pending owner and mutex has waiters - -Pending-ownership handling is a performance optimization: -pending-ownership is assigned to the first (highest priority) waiter of -the mutex, when the mutex is released. The thread is woken up and once -it starts executing it can acquire the mutex. Until the mutex is taken -by it (bit 0 is cleared) a competing higher priority thread can "steal" -the mutex which puts the woken up thread back on the waiters list. - -The pending-ownership optimization is especially important for the -uninterrupted workflow of high-prio tasks which repeatedly -takes/releases locks that have lower-prio waiters. Without this -optimization the higher-prio thread would ping-pong to the lower-prio -task [because at unlock time we always assign a new owner]. - -(*) The "mutex has waiters" bit gets set to take the lock. If the lock -doesn't already have an owner, this bit is quickly cleared if there are -no waiters. So this is a transitional state to synchronize with looking -at the owner field of the mutex and the mutex owner releasing the lock. diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt deleted file mode 100644 index 97eaf5727178..000000000000 --- a/Documentation/spinlocks.txt +++ /dev/null @@ -1,167 +0,0 @@ -Lesson 1: Spin locks - -The most basic primitive for locking is spinlock. - -static DEFINE_SPINLOCK(xxx_lock); - - unsigned long flags; - - spin_lock_irqsave(&xxx_lock, flags); - ... critical section here .. - spin_unlock_irqrestore(&xxx_lock, flags); - -The above is always safe. It will disable interrupts _locally_, but the -spinlock itself will guarantee the global lock, so it will guarantee that -there is only one thread-of-control within the region(s) protected by that -lock. This works well even under UP also, so the code does _not_ need to -worry about UP vs SMP issues: the spinlocks work correctly under both. - - NOTE! Implications of spin_locks for memory are further described in: - - Documentation/memory-barriers.txt - (5) LOCK operations. - (6) UNLOCK operations. - -The above is usually pretty simple (you usually need and want only one -spinlock for most things - using more than one spinlock can make things a -lot more complex and even slower and is usually worth it only for -sequences that you _know_ need to be split up: avoid it at all cost if you -aren't sure). - -This is really the only really hard part about spinlocks: once you start -using spinlocks they tend to expand to areas you might not have noticed -before, because you have to make sure the spinlocks correctly protect the -shared data structures _everywhere_ they are used. The spinlocks are most -easily added to places that are completely independent of other code (for -example, internal driver data structures that nobody else ever touches). - - NOTE! The spin-lock is safe only when you _also_ use the lock itself - to do locking across CPU's, which implies that EVERYTHING that - touches a shared variable has to agree about the spinlock they want - to use. - ----- - -Lesson 2: reader-writer spinlocks. - -If your data accesses have a very natural pattern where you usually tend -to mostly read from the shared variables, the reader-writer locks -(rw_lock) versions of the spinlocks are sometimes useful. They allow multiple -readers to be in the same critical region at once, but if somebody wants -to change the variables it has to get an exclusive write lock. - - NOTE! reader-writer locks require more atomic memory operations than - simple spinlocks. Unless the reader critical section is long, you - are better off just using spinlocks. - -The routines look the same as above: - - rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock); - - unsigned long flags; - - read_lock_irqsave(&xxx_lock, flags); - .. critical section that only reads the info ... - read_unlock_irqrestore(&xxx_lock, flags); - - write_lock_irqsave(&xxx_lock, flags); - .. read and write exclusive access to the info ... - write_unlock_irqrestore(&xxx_lock, flags); - -The above kind of lock may be useful for complex data structures like -linked lists, especially searching for entries without changing the list -itself. The read lock allows many concurrent readers. Anything that -_changes_ the list will have to get the write lock. - - NOTE! RCU is better for list traversal, but requires careful - attention to design detail (see Documentation/RCU/listRCU.txt). - -Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_ -time need to do any changes (even if you don't do it every time), you have -to get the write-lock at the very beginning. - - NOTE! We are working hard to remove reader-writer spinlocks in most - cases, so please don't add a new one without consensus. (Instead, see - Documentation/RCU/rcu.txt for complete information.) - ----- - -Lesson 3: spinlocks revisited. - -The single spin-lock primitives above are by no means the only ones. They -are the most safe ones, and the ones that work under all circumstances, -but partly _because_ they are safe they are also fairly slow. They are slower -than they'd need to be, because they do have to disable interrupts -(which is just a single instruction on a x86, but it's an expensive one - -and on other architectures it can be worse). - -If you have a case where you have to protect a data structure across -several CPU's and you want to use spinlocks you can potentially use -cheaper versions of the spinlocks. IFF you know that the spinlocks are -never used in interrupt handlers, you can use the non-irq versions: - - spin_lock(&lock); - ... - spin_unlock(&lock); - -(and the equivalent read-write versions too, of course). The spinlock will -guarantee the same kind of exclusive access, and it will be much faster. -This is useful if you know that the data in question is only ever -manipulated from a "process context", ie no interrupts involved. - -The reasons you mustn't use these versions if you have interrupts that -play with the spinlock is that you can get deadlocks: - - spin_lock(&lock); - ... - <- interrupt comes in: - spin_lock(&lock); - -where an interrupt tries to lock an already locked variable. This is ok if -the other interrupt happens on another CPU, but it is _not_ ok if the -interrupt happens on the same CPU that already holds the lock, because the -lock will obviously never be released (because the interrupt is waiting -for the lock, and the lock-holder is interrupted by the interrupt and will -not continue until the interrupt has been processed). - -(This is also the reason why the irq-versions of the spinlocks only need -to disable the _local_ interrupts - it's ok to use spinlocks in interrupts -on other CPU's, because an interrupt on another CPU doesn't interrupt the -CPU that holds the lock, so the lock-holder can continue and eventually -releases the lock). - -Note that you can be clever with read-write locks and interrupts. For -example, if you know that the interrupt only ever gets a read-lock, then -you can use a non-irq version of read locks everywhere - because they -don't block on each other (and thus there is no dead-lock wrt interrupts. -But when you do the write-lock, you have to use the irq-safe version. - -For an example of being clever with rw-locks, see the "waitqueue_lock" -handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from -within an interrupt, they only read the queue in order to know whom to -wake up. So read-locks are safe (which is good: they are very common -indeed), while write-locks need to protect themselves against interrupts. - - Linus - ----- - -Reference information: - -For dynamic initialization, use spin_lock_init() or rwlock_init() as -appropriate: - - spinlock_t xxx_lock; - rwlock_t xxx_rw_lock; - - static int __init xxx_init(void) - { - spin_lock_init(&xxx_lock); - rwlock_init(&xxx_rw_lock); - ... - } - - module_init(xxx_init); - -For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or -__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate. diff --git a/Documentation/ww-mutex-design.txt b/Documentation/ww-mutex-design.txt deleted file mode 100644 index 8a112dc304c3..000000000000 --- a/Documentation/ww-mutex-design.txt +++ /dev/null @@ -1,344 +0,0 @@ -Wait/Wound Deadlock-Proof Mutex Design -====================================== - -Please read mutex-design.txt first, as it applies to wait/wound mutexes too. - -Motivation for WW-Mutexes -------------------------- - -GPU's do operations that commonly involve many buffers. Those buffers -can be shared across contexts/processes, exist in different memory -domains (for example VRAM vs system memory), and so on. And with -PRIME / dmabuf, they can even be shared across devices. So there are -a handful of situations where the driver needs to wait for buffers to -become ready. If you think about this in terms of waiting on a buffer -mutex for it to become available, this presents a problem because -there is no way to guarantee that buffers appear in a execbuf/batch in -the same order in all contexts. That is directly under control of -userspace, and a result of the sequence of GL calls that an application -makes. Which results in the potential for deadlock. The problem gets -more complex when you consider that the kernel may need to migrate the -buffer(s) into VRAM before the GPU operates on the buffer(s), which -may in turn require evicting some other buffers (and you don't want to -evict other buffers which are already queued up to the GPU), but for a -simplified understanding of the problem you can ignore this. - -The algorithm that the TTM graphics subsystem came up with for dealing with -this problem is quite simple. For each group of buffers (execbuf) that need -to be locked, the caller would be assigned a unique reservation id/ticket, -from a global counter. In case of deadlock while locking all the buffers -associated with a execbuf, the one with the lowest reservation ticket (i.e. -the oldest task) wins, and the one with the higher reservation id (i.e. the -younger task) unlocks all of the buffers that it has already locked, and then -tries again. - -In the RDBMS literature this deadlock handling approach is called wait/wound: -The older tasks waits until it can acquire the contended lock. The younger tasks -needs to back off and drop all the locks it is currently holding, i.e. the -younger task is wounded. - -Concepts --------- - -Compared to normal mutexes two additional concepts/objects show up in the lock -interface for w/w mutexes: - -Acquire context: To ensure eventual forward progress it is important the a task -trying to acquire locks doesn't grab a new reservation id, but keeps the one it -acquired when starting the lock acquisition. This ticket is stored in the -acquire context. Furthermore the acquire context keeps track of debugging state -to catch w/w mutex interface abuse. - -W/w class: In contrast to normal mutexes the lock class needs to be explicit for -w/w mutexes, since it is required to initialize the acquire context. - -Furthermore there are three different class of w/w lock acquire functions: - -* Normal lock acquisition with a context, using ww_mutex_lock. - -* Slowpath lock acquisition on the contending lock, used by the wounded task - after having dropped all already acquired locks. These functions have the - _slow postfix. - - From a simple semantics point-of-view the _slow functions are not strictly - required, since simply calling the normal ww_mutex_lock functions on the - contending lock (after having dropped all other already acquired locks) will - work correctly. After all if no other ww mutex has been acquired yet there's - no deadlock potential and hence the ww_mutex_lock call will block and not - prematurely return -EDEADLK. The advantage of the _slow functions is in - interface safety: - - ww_mutex_lock has a __must_check int return type, whereas ww_mutex_lock_slow - has a void return type. Note that since ww mutex code needs loops/retries - anyway the __must_check doesn't result in spurious warnings, even though the - very first lock operation can never fail. - - When full debugging is enabled ww_mutex_lock_slow checks that all acquired - ww mutex have been released (preventing deadlocks) and makes sure that we - block on the contending lock (preventing spinning through the -EDEADLK - slowpath until the contended lock can be acquired). - -* Functions to only acquire a single w/w mutex, which results in the exact same - semantics as a normal mutex. This is done by calling ww_mutex_lock with a NULL - context. - - Again this is not strictly required. But often you only want to acquire a - single lock in which case it's pointless to set up an acquire context (and so - better to avoid grabbing a deadlock avoidance ticket). - -Of course, all the usual variants for handling wake-ups due to signals are also -provided. - -Usage ------ - -Three different ways to acquire locks within the same w/w class. Common -definitions for methods #1 and #2: - -static DEFINE_WW_CLASS(ww_class); - -struct obj { - struct ww_mutex lock; - /* obj data */ -}; - -struct obj_entry { - struct list_head head; - struct obj *obj; -}; - -Method 1, using a list in execbuf->buffers that's not allowed to be reordered. -This is useful if a list of required objects is already tracked somewhere. -Furthermore the lock helper can use propagate the -EALREADY return code back to -the caller as a signal that an object is twice on the list. This is useful if -the list is constructed from userspace input and the ABI requires userspace to -not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl). - -int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) -{ - struct obj *res_obj = NULL; - struct obj_entry *contended_entry = NULL; - struct obj_entry *entry; - - ww_acquire_init(ctx, &ww_class); - -retry: - list_for_each_entry (entry, list, head) { - if (entry->obj == res_obj) { - res_obj = NULL; - continue; - } - ret = ww_mutex_lock(&entry->obj->lock, ctx); - if (ret < 0) { - contended_entry = entry; - goto err; - } - } - - ww_acquire_done(ctx); - return 0; - -err: - list_for_each_entry_continue_reverse (entry, list, head) - ww_mutex_unlock(&entry->obj->lock); - - if (res_obj) - ww_mutex_unlock(&res_obj->lock); - - if (ret == -EDEADLK) { - /* we lost out in a seqno race, lock and retry.. */ - ww_mutex_lock_slow(&contended_entry->obj->lock, ctx); - res_obj = contended_entry->obj; - goto retry; - } - ww_acquire_fini(ctx); - - return ret; -} - -Method 2, using a list in execbuf->buffers that can be reordered. Same semantics -of duplicate entry detection using -EALREADY as method 1 above. But the -list-reordering allows for a bit more idiomatic code. - -int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) -{ - struct obj_entry *entry, *entry2; - - ww_acquire_init(ctx, &ww_class); - - list_for_each_entry (entry, list, head) { - ret = ww_mutex_lock(&entry->obj->lock, ctx); - if (ret < 0) { - entry2 = entry; - - list_for_each_entry_continue_reverse (entry2, list, head) - ww_mutex_unlock(&entry2->obj->lock); - - if (ret != -EDEADLK) { - ww_acquire_fini(ctx); - return ret; - } - - /* we lost out in a seqno race, lock and retry.. */ - ww_mutex_lock_slow(&entry->obj->lock, ctx); - - /* - * Move buf to head of the list, this will point - * buf->next to the first unlocked entry, - * restarting the for loop. - */ - list_del(&entry->head); - list_add(&entry->head, list); - } - } - - ww_acquire_done(ctx); - return 0; -} - -Unlocking works the same way for both methods #1 and #2: - -void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) -{ - struct obj_entry *entry; - - list_for_each_entry (entry, list, head) - ww_mutex_unlock(&entry->obj->lock); - - ww_acquire_fini(ctx); -} - -Method 3 is useful if the list of objects is constructed ad-hoc and not upfront, -e.g. when adjusting edges in a graph where each node has its own ww_mutex lock, -and edges can only be changed when holding the locks of all involved nodes. w/w -mutexes are a natural fit for such a case for two reasons: -- They can handle lock-acquisition in any order which allows us to start walking - a graph from a starting point and then iteratively discovering new edges and - locking down the nodes those edges connect to. -- Due to the -EALREADY return code signalling that a given objects is already - held there's no need for additional book-keeping to break cycles in the graph - or keep track off which looks are already held (when using more than one node - as a starting point). - -Note that this approach differs in two important ways from the above methods: -- Since the list of objects is dynamically constructed (and might very well be - different when retrying due to hitting the -EDEADLK wound condition) there's - no need to keep any object on a persistent list when it's not locked. We can - therefore move the list_head into the object itself. -- On the other hand the dynamic object list construction also means that the -EALREADY return - code can't be propagated. - -Note also that methods #1 and #2 and method #3 can be combined, e.g. to first lock a -list of starting nodes (passed in from userspace) using one of the above -methods. And then lock any additional objects affected by the operations using -method #3 below. The backoff/retry procedure will be a bit more involved, since -when the dynamic locking step hits -EDEADLK we also need to unlock all the -objects acquired with the fixed list. But the w/w mutex debug checks will catch -any interface misuse for these cases. - -Also, method 3 can't fail the lock acquisition step since it doesn't return --EALREADY. Of course this would be different when using the _interruptible -variants, but that's outside of the scope of these examples here. - -struct obj { - struct ww_mutex ww_mutex; - struct list_head locked_list; -}; - -static DEFINE_WW_CLASS(ww_class); - -void __unlock_objs(struct list_head *list) -{ - struct obj *entry, *temp; - - list_for_each_entry_safe (entry, temp, list, locked_list) { - /* need to do that before unlocking, since only the current lock holder is - allowed to use object */ - list_del(&entry->locked_list); - ww_mutex_unlock(entry->ww_mutex) - } -} - -void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) -{ - struct obj *obj; - - ww_acquire_init(ctx, &ww_class); - -retry: - /* re-init loop start state */ - loop { - /* magic code which walks over a graph and decides which objects - * to lock */ - - ret = ww_mutex_lock(obj->ww_mutex, ctx); - if (ret == -EALREADY) { - /* we have that one already, get to the next object */ - continue; - } - if (ret == -EDEADLK) { - __unlock_objs(list); - - ww_mutex_lock_slow(obj, ctx); - list_add(&entry->locked_list, list); - goto retry; - } - - /* locked a new object, add it to the list */ - list_add_tail(&entry->locked_list, list); - } - - ww_acquire_done(ctx); - return 0; -} - -void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx) -{ - __unlock_objs(list); - ww_acquire_fini(ctx); -} - -Method 4: Only lock one single objects. In that case deadlock detection and -prevention is obviously overkill, since with grabbing just one lock you can't -produce a deadlock within just one class. To simplify this case the w/w mutex -api can be used with a NULL context. - -Implementation Details ----------------------- - -Design: - ww_mutex currently encapsulates a struct mutex, this means no extra overhead for - normal mutex locks, which are far more common. As such there is only a small - increase in code size if wait/wound mutexes are not used. - - In general, not much contention is expected. The locks are typically used to - serialize access to resources for devices. The only way to make wakeups - smarter would be at the cost of adding a field to struct mutex_waiter. This - would add overhead to all cases where normal mutexes are used, and - ww_mutexes are generally less performance sensitive. - -Lockdep: - Special care has been taken to warn for as many cases of api abuse - as possible. Some common api abuses will be caught with - CONFIG_DEBUG_MUTEXES, but CONFIG_PROVE_LOCKING is recommended. - - Some of the errors which will be warned about: - - Forgetting to call ww_acquire_fini or ww_acquire_init. - - Attempting to lock more mutexes after ww_acquire_done. - - Attempting to lock the wrong mutex after -EDEADLK and - unlocking all mutexes. - - Attempting to lock the right mutex after -EDEADLK, - before unlocking all mutexes. - - - Calling ww_mutex_lock_slow before -EDEADLK was returned. - - - Unlocking mutexes with the wrong unlock function. - - Calling one of the ww_acquire_* twice on the same context. - - Using a different ww_class for the mutex than for the ww_acquire_ctx. - - Normal lockdep errors that can result in deadlocks. - - Some of the lockdep errors that can result in deadlocks: - - Calling ww_acquire_init to initialize a second ww_acquire_ctx before - having called ww_acquire_fini on the first. - - 'normal' deadlocks that can occur. - -FIXME: Update this section once we have the TASK_DEADLOCK task state flag magic -implemented. diff --git a/MAINTAINERS b/MAINTAINERS index 1acc624ecfd7..aac481fcbf5c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5523,8 +5523,8 @@ M: Ingo Molnar L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/locking S: Maintained -F: Documentation/lockdep*.txt -F: Documentation/lockstat.txt +F: Documentation/locking/lockdep*.txt +F: Documentation/locking/lockstat.txt F: include/linux/lockdep.h F: kernel/locking/ diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index 0dc57d5ecd10..3a02e5e3e9f3 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -35,7 +35,7 @@ * of extra utility/tracking out of our acquire-ctx. This is provided * by drm_modeset_lock / drm_modeset_acquire_ctx. * - * For basic principles of ww_mutex, see: Documentation/ww-mutex-design.txt + * For basic principles of ww_mutex, see: Documentation/locking/ww-mutex-design.txt * * The basic usage pattern is to: * diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 008388f920d7..f388481201cd 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -4,7 +4,7 @@ * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra * - * see Documentation/lockdep-design.txt for more details. + * see Documentation/locking/lockdep-design.txt for more details. */ #ifndef __LINUX_LOCKDEP_H #define __LINUX_LOCKDEP_H diff --git a/include/linux/mutex.h b/include/linux/mutex.h index e4c29418f407..cc31498fc526 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -133,7 +133,7 @@ static inline int mutex_is_locked(struct mutex *lock) /* * See kernel/locking/mutex.c for detailed documentation of these APIs. - * Also see Documentation/mutex-design.txt. + * Also see Documentation/locking/mutex-design.txt. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 035d3c57fc8a..8f498cdde280 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -149,7 +149,7 @@ extern void downgrade_write(struct rw_semaphore *sem); * static then another method for expressing nested locking is * the explicit definition of lock class keys and the use of * lockdep_set_class() at lock initialization time. - * See Documentation/lockdep-design.txt for more details.) + * See Documentation/locking/lockdep-design.txt for more details.) */ extern void down_read_nested(struct rw_semaphore *sem, int subclass); extern void down_write_nested(struct rw_semaphore *sem, int subclass); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 0d8b6ed93874..dadbf88c22c4 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -15,7 +15,7 @@ * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale * and Sven Dietrich. * - * Also see Documentation/mutex-design.txt. + * Also see Documentation/locking/mutex-design.txt. */ #include #include diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index a0ea2a141b3b..7c98873a3077 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -8,7 +8,7 @@ * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt * Copyright (C) 2006 Esben Nielsen * - * See Documentation/rt-mutex-design.txt for details. + * See Documentation/locking/rt-mutex-design.txt for details. */ #include #include diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 901096d31c66..9b94a063e26c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -924,7 +924,7 @@ config PROVE_LOCKING the proof of observed correctness is also maintained for an arbitrary combination of these separate locking variants. - For more details, see Documentation/lockdep-design.txt. + For more details, see Documentation/locking/lockdep-design.txt. config LOCKDEP bool @@ -945,7 +945,7 @@ config LOCK_STAT help This feature enables tracking lock contention points - For more details, see Documentation/lockstat.txt + For more details, see Documentation/locking/lockstat.txt This also enables lock events required by "perf lock", subcommand of perf. -- cgit v1.2.3 From 4999201a59ef555f9105d2bb2459ed895627f7aa Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 8 Aug 2014 12:35:36 +0200 Subject: locking/spinlocks: Always evaluate the second argument of spin_lock_nested() Evaluating a macro argument only if certain configuration options have been selected is confusing and error-prone. Hence always evaluate the second argument of spin_lock_nested(). An intentional side effect of this patch is that it avoids that the following warning is reported for netif_addr_lock_nested() when building with CONFIG_DEBUG_LOCK_ALLOC=n and with W=1: include/linux/netdevice.h: In function 'netif_addr_lock_nested': include/linux/netdevice.h:2865:6: warning: variable 'subclass' set but not used [-Wunused-but-set-variable] int subclass = SINGLE_DEPTH_NESTING; ^ Signed-off-by: Bart Van Assche Signed-off-by: Peter Zijlstra Cc: David Rientjes Cc: David S. Miller Cc: Andrew Morton Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paul E. McKenney Link: http://lkml.kernel.org/r/53E4A7F8.1040700@acm.org Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 3f2867ff0ced..262ba4ef9a8e 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -197,7 +197,13 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) _raw_spin_lock_nest_lock(lock, &(nest_lock)->dep_map); \ } while (0) #else -# define raw_spin_lock_nested(lock, subclass) _raw_spin_lock(lock) +/* + * Always evaluate the 'subclass' argument to avoid that the compiler + * warns about set-but-not-used variables when building with + * CONFIG_DEBUG_LOCK_ALLOC=n and with W=1. + */ +# define raw_spin_lock_nested(lock, subclass) \ + _raw_spin_lock(((void)(subclass), (lock))) # define raw_spin_lock_nest_lock(lock, nest_lock) _raw_spin_lock(lock) #endif -- cgit v1.2.3 From f0bab73cb539fb803c4d419951e8d28aa4964f8f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 6 Aug 2014 13:22:01 -0400 Subject: locking/lockdep: Restrict the use of recursive read_lock() with qrwlock Unlike the original unfair rwlock implementation, queued rwlock will grant lock according to the chronological sequence of the lock requests except when the lock requester is in the interrupt context. Consequently, recursive read_lock calls will now hang the process if there is a write_lock call somewhere in between the read_lock calls. This patch updates the lockdep implementation to look for recursive read_lock calls. A new read state (3) is used to mark those read_lock call that cannot be recursively called except in the interrupt context. The new read state does exhaust the 2 bits available in held_lock:read bit field. The addition of any new read state in the future may require a redesign of how all those bits are squeezed together in the held_lock structure. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra Cc: Maarten Lankhorst Cc: Rik van Riel Cc: Scott J Norton Cc: Fengguang Wu Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1407345722-61615-2-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 10 +++++++++- kernel/locking/lockdep.c | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index f388481201cd..b5a84b62fb84 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -478,16 +478,24 @@ static inline void print_irqtrace_events(struct task_struct *curr) * on the per lock-class debug mode: */ +/* + * Read states in the 2-bit held_lock:read field: + * 0: Exclusive lock + * 1: Shareable lock, cannot be recursively called + * 2: Shareable lock, can be recursively called + * 3: Shareable lock, cannot be recursively called except in interrupt context + */ #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i) #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i) +#define lock_acquire_shared_irecursive(l, s, t, n, i) lock_acquire(l, s, t, 3, 1, n, i) #define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define spin_release(l, n, i) lock_release(l, n, i) #define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) -#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) +#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_irecursive(l, s, t, NULL, i) #define rwlock_release(l, n, i) lock_release(l, n, i) #define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 88d0d4420ad2..420ba685c4e5 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3597,6 +3597,12 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, raw_local_irq_save(flags); check_flags(flags); + /* + * An interrupt recursive read in interrupt context can be considered + * to be the same as a recursive read from checking perspective. + */ + if ((read == 3) && in_interrupt()) + read = 2; current->lockdep_recursion = 1; trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); __lock_acquire(lock, subclass, trylock, read, check, -- cgit v1.2.3 From 515d9b2c03943ca904cd135e1b1d9ddd168c1b27 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 12 Aug 2014 18:22:27 +0200 Subject: ata: remove deprecated struct ahci_platform_data The last user of the deprecated struct ahci_platform_data has been cleaned up recently (SPEAr1340 got a proper PHY driver). Signed-off-by: Bartlomiej Zolnierkiewicz Acked-by: Hans de Goede Signed-off-by: Tejun Heo --- drivers/ata/ahci_platform.c | 18 +----------------- drivers/ata/libahci_platform.c | 23 ----------------------- include/linux/ahci_platform.h | 13 ------------- 3 files changed, 1 insertion(+), 53 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index f61ddb9146d6..06f1d59fa678 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -32,7 +32,6 @@ static const struct ata_port_info ahci_port_info = { static int ahci_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct ahci_platform_data *pdata = dev_get_platdata(dev); struct ahci_host_priv *hpriv; int rc; @@ -44,29 +43,14 @@ static int ahci_probe(struct platform_device *pdev) if (rc) return rc; - /* - * Some platforms might need to prepare for mmio region access, - * which could be done in the following init call. So, the mmio - * region shouldn't be accessed before init (if provided) has - * returned successfully. - */ - if (pdata && pdata->init) { - rc = pdata->init(dev, hpriv->mmio); - if (rc) - goto disable_resources; - } - if (of_device_is_compatible(dev->of_node, "hisilicon,hisi-ahci")) hpriv->flags |= AHCI_HFLAG_NO_FBS | AHCI_HFLAG_NO_NCQ; rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info); if (rc) - goto pdata_exit; + goto disable_resources; return 0; -pdata_exit: - if (pdata && pdata->exit) - pdata->exit(dev); disable_resources: ahci_platform_disable_resources(hpriv); return rc; diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 5b92c290e6c6..c0510de8a4c9 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -502,13 +502,8 @@ EXPORT_SYMBOL_GPL(ahci_platform_init_host); static void ahci_host_stop(struct ata_host *host) { - struct device *dev = host->dev; - struct ahci_platform_data *pdata = dev_get_platdata(dev); struct ahci_host_priv *hpriv = host->private_data; - if (pdata && pdata->exit) - pdata->exit(dev); - ahci_platform_disable_resources(hpriv); } @@ -592,7 +587,6 @@ EXPORT_SYMBOL_GPL(ahci_platform_resume_host); */ int ahci_platform_suspend(struct device *dev) { - struct ahci_platform_data *pdata = dev_get_platdata(dev); struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; int rc; @@ -601,19 +595,9 @@ int ahci_platform_suspend(struct device *dev) if (rc) return rc; - if (pdata && pdata->suspend) { - rc = pdata->suspend(dev); - if (rc) - goto resume_host; - } - ahci_platform_disable_resources(hpriv); return 0; - -resume_host: - ahci_platform_resume_host(dev); - return rc; } EXPORT_SYMBOL_GPL(ahci_platform_suspend); @@ -629,7 +613,6 @@ EXPORT_SYMBOL_GPL(ahci_platform_suspend); */ int ahci_platform_resume(struct device *dev) { - struct ahci_platform_data *pdata = dev_get_platdata(dev); struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; int rc; @@ -638,12 +621,6 @@ int ahci_platform_resume(struct device *dev) if (rc) return rc; - if (pdata && pdata->resume) { - rc = pdata->resume(dev); - if (rc) - goto disable_resources; - } - rc = ahci_platform_resume_host(dev); if (rc) goto disable_resources; diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 09a947e8bc87..642d6ae4030c 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -22,19 +22,6 @@ struct ata_port_info; struct ahci_host_priv; struct platform_device; -/* - * Note ahci_platform_data is deprecated, it is only kept around for use - * by the old da850 and spear13xx ahci code. - * New drivers should instead declare their own platform_driver struct, and - * use ahci_platform* functions in their own probe, suspend and resume methods. - */ -struct ahci_platform_data { - int (*init)(struct device *dev, void __iomem *addr); - void (*exit)(struct device *dev); - int (*suspend)(struct device *dev); - int (*resume)(struct device *dev); -}; - int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); int ahci_platform_enable_resources(struct ahci_host_priv *hpriv); -- cgit v1.2.3 From 005547e0828ce9064afebb1e6d56a18efd80e7a3 Mon Sep 17 00:00:00 2001 From: James Ban Date: Fri, 8 Aug 2014 14:27:04 +0900 Subject: regulator: da9211: support DA9213 This is a patch for supporting DA9213. Signed-off-by: James Ban Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 9 ++-- drivers/regulator/da9211-regulator.c | 95 ++++++++++++++++++++++++++++-------- drivers/regulator/da9211-regulator.h | 7 ++- include/linux/regulator/da9211.h | 7 ++- 4 files changed, 91 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 2dc8289e5dba..1344aa83b438 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -199,13 +199,14 @@ config REGULATOR_DA9210 interface. config REGULATOR_DA9211 - tristate "Dialog Semiconductor DA9211/DA9212 regulator" + tristate "Dialog Semiconductor DA9211/DA9212/DA9213/DA9214 regulator" depends on I2C select REGMAP_I2C help - Say y here to support for the Dialog Semiconductor DA9211/DA9212. - The DA9211/DA9212 is a multi-phase synchronous step down - converter 12A DC-DC Buck controlled through an I2C + Say y here to support for the Dialog Semiconductor DA9211/DA9212 + /DA9213/DA9214. + The DA9211/DA9212/DA9213/DA9214 is a multi-phase synchronous + step down converter 12A or 16A DC-DC Buck controlled through an I2C interface. config REGULATOR_DBX500_PRCMU diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c index 1482adafa1ad..ccc2e362d751 100644 --- a/drivers/regulator/da9211-regulator.c +++ b/drivers/regulator/da9211-regulator.c @@ -1,5 +1,5 @@ /* - * da9211-regulator.c - Regulator device driver for DA9211 + * da9211-regulator.c - Regulator device driver for DA9211/DA9213 * Copyright (C) 2014 Dialog Semiconductor Ltd. * * This library is free software; you can redistribute it and/or @@ -27,6 +27,10 @@ #include #include "da9211-regulator.h" +/* DEVICE IDs */ +#define DA9211_DEVICE_ID 0x22 +#define DA9213_DEVICE_ID 0x23 + #define DA9211_BUCK_MODE_SLEEP 1 #define DA9211_BUCK_MODE_SYNC 2 #define DA9211_BUCK_MODE_AUTO 3 @@ -42,6 +46,7 @@ struct da9211 { struct regulator_dev *rdev[DA9211_MAX_REGULATORS]; int num_regulator; int chip_irq; + int chip_id; }; static const struct regmap_range_cfg da9211_regmap_range[] = { @@ -52,14 +57,14 @@ static const struct regmap_range_cfg da9211_regmap_range[] = { .window_start = 0, .window_len = 256, .range_min = 0, - .range_max = 2*256, + .range_max = 5*128, }, }; static const struct regmap_config da9211_regmap_config = { .reg_bits = 8, .val_bits = 8, - .max_register = 2 * 256, + .max_register = 5 * 128, .ranges = da9211_regmap_range, .num_ranges = ARRAY_SIZE(da9211_regmap_range), }; @@ -69,11 +74,20 @@ static const struct regmap_config da9211_regmap_config = { #define DA9211_MAX_MV 1570 #define DA9211_STEP_MV 10 -/* Current limits for buck (uA) indices corresponds with register values */ +/* Current limits for DA9211 buck (uA) indices + * corresponds with register values + */ static const int da9211_current_limits[] = { 2000000, 2200000, 2400000, 2600000, 2800000, 3000000, 3200000, 3400000, 3600000, 3800000, 4000000, 4200000, 4400000, 4600000, 4800000, 5000000 }; +/* Current limits for DA9213 buck (uA) indices + * corresponds with register values + */ +static const int da9213_current_limits[] = { + 3000000, 3200000, 3400000, 3600000, 3800000, 4000000, 4200000, 4400000, + 4600000, 4800000, 5000000, 5200000, 5400000, 5600000, 5800000, 6000000 +}; static unsigned int da9211_buck_get_mode(struct regulator_dev *rdev) { @@ -129,12 +143,26 @@ static int da9211_set_current_limit(struct regulator_dev *rdev, int min, { int id = rdev_get_id(rdev); struct da9211 *chip = rdev_get_drvdata(rdev); - int i; + int i, max_size; + const int *current_limits; + + switch (chip->chip_id) { + case DA9211: + current_limits = da9211_current_limits; + max_size = ARRAY_SIZE(da9211_current_limits)-1; + break; + case DA9213: + current_limits = da9213_current_limits; + max_size = ARRAY_SIZE(da9213_current_limits)-1; + break; + default: + return -EINVAL; + } /* search for closest to maximum */ - for (i = ARRAY_SIZE(da9211_current_limits)-1; i >= 0; i--) { - if (min <= da9211_current_limits[i] && - max >= da9211_current_limits[i]) { + for (i = max_size; i >= 0; i--) { + if (min <= current_limits[i] && + max >= current_limits[i]) { return regmap_update_bits(chip->regmap, DA9211_REG_BUCK_ILIM, (0x0F << id*4), (i << id*4)); @@ -150,14 +178,28 @@ static int da9211_get_current_limit(struct regulator_dev *rdev) struct da9211 *chip = rdev_get_drvdata(rdev); unsigned int data; int ret; + const int *current_limits; + + switch (chip->chip_id) { + case DA9211: + current_limits = da9211_current_limits; + break; + case DA9213: + current_limits = da9213_current_limits; + break; + default: + return -EINVAL; + } ret = regmap_read(chip->regmap, DA9211_REG_BUCK_ILIM, &data); if (ret < 0) return ret; - /* select one of 16 values: 0000 (2000mA) to 1111 (5000mA) */ + /* select one of 16 values: 0000 (2000mA or 3000mA) + * to 1111 (5000mA or 6000mA). + */ data = (data >> id*4) & 0x0F; - return da9211_current_limits[data]; + return current_limits[data]; } static struct regulator_ops da9211_buck_ops = { @@ -264,10 +306,7 @@ static int da9211_regulator_init(struct da9211 *chip) } for (i = 0; i < chip->num_regulator; i++) { - if (chip->pdata) - config.init_data = - &(chip->pdata->init_data[i]); - + config.init_data = &(chip->pdata->init_data[i]); config.dev = chip->dev; config.driver_data = chip; config.regmap = chip->regmap; @@ -301,6 +340,7 @@ static int da9211_i2c_probe(struct i2c_client *i2c, { struct da9211 *chip; int error, ret; + unsigned int data; chip = devm_kzalloc(&i2c->dev, sizeof(struct da9211), GFP_KERNEL); @@ -308,7 +348,7 @@ static int da9211_i2c_probe(struct i2c_client *i2c, chip->regmap = devm_regmap_init_i2c(i2c, &da9211_regmap_config); if (IS_ERR(chip->regmap)) { error = PTR_ERR(chip->regmap); - dev_err(&i2c->dev, "Failed to allocate register map: %d\n", + dev_err(chip->dev, "Failed to allocate register map: %d\n", error); return error; } @@ -316,8 +356,22 @@ static int da9211_i2c_probe(struct i2c_client *i2c, i2c_set_clientdata(i2c, chip); chip->pdata = i2c->dev.platform_data; - if (!chip->pdata) { - dev_err(&i2c->dev, "No platform init data supplied\n"); + + ret = regmap_read(chip->regmap, DA9211_REG_DEVICE_ID, &data); + if (ret < 0) { + dev_err(chip->dev, "Failed to read DEVICE_ID reg: %d\n", ret); + return ret; + } + + switch (data) { + case DA9211_DEVICE_ID: + chip->chip_id = DA9211; + break; + case DA9213_DEVICE_ID: + chip->chip_id = DA9213; + break; + default: + dev_err(chip->dev, "Unsupported device id = 0x%x.\n", data); return -ENODEV; } @@ -340,13 +394,14 @@ static int da9211_i2c_probe(struct i2c_client *i2c, ret = da9211_regulator_init(chip); if (ret < 0) - dev_err(&i2c->dev, "Failed to initialize regulator: %d\n", ret); + dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); return ret; } static const struct i2c_device_id da9211_i2c_id[] = { - {"da9211", 0}, + {"da9211", DA9211}, + {"da9213", DA9213}, {}, }; @@ -364,5 +419,5 @@ static struct i2c_driver da9211_regulator_driver = { module_i2c_driver(da9211_regulator_driver); MODULE_AUTHOR("James Ban "); -MODULE_DESCRIPTION("Regulator device driver for Dialog DA9211"); +MODULE_DESCRIPTION("Regulator device driver for Dialog DA9211/DA9213"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/regulator/da9211-regulator.h b/drivers/regulator/da9211-regulator.h index 88b1769e8058..93fa9df2721c 100644 --- a/drivers/regulator/da9211-regulator.h +++ b/drivers/regulator/da9211-regulator.h @@ -1,5 +1,5 @@ /* - * da9211-regulator.h - Regulator definitions for DA9211 + * da9211-regulator.h - Regulator definitions for DA9211/DA9213 * Copyright (C) 2014 Dialog Semiconductor Ltd. * * This library is free software; you can redistribute it and/or @@ -53,12 +53,15 @@ /* BUCK Phase Selection*/ #define DA9211_REG_CONFIG_E 0x147 +/* Device ID */ +#define DA9211_REG_DEVICE_ID 0x201 + /* * Registers bits */ /* DA9211_REG_PAGE_CON (addr=0x00) */ #define DA9211_REG_PAGE_SHIFT 1 -#define DA9211_REG_PAGE_MASK 0x02 +#define DA9211_REG_PAGE_MASK 0x06 /* On I2C registers 0x00 - 0xFF */ #define DA9211_REG_PAGE0 0 /* On I2C registers 0x100 - 0x1FF */ diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h index 0981ce0e72cc..658c3c33f4c0 100644 --- a/include/linux/regulator/da9211.h +++ b/include/linux/regulator/da9211.h @@ -1,5 +1,5 @@ /* - * da9211.h - Regulator device driver for DA9211 + * da9211.h - Regulator device driver for DA9211/DA9213 * Copyright (C) 2014 Dialog Semiconductor Ltd. * * This library is free software; you can redistribute it and/or @@ -20,6 +20,11 @@ #define DA9211_MAX_REGULATORS 2 +enum da9211_chip_id { + DA9211, + DA9213, +}; + struct da9211_pdata { /* * Number of buck -- cgit v1.2.3 From 0e4f417857083f399769491f6e7773d111debd0f Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Tue, 15 Jul 2014 16:32:51 +0530 Subject: regulator: s2mpxxx: Move regulator min/step voltages in common place This is a cleanup patch and moves min/step voltages in a common samsung header file so that they can be used by other s2mpxxx PMIC drivers. Only few required macros are added currently and others can be added if needed. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Amit Daniel Kachhap Acked-by: Lee Jones Signed-off-by: Mark Brown --- drivers/regulator/s2mpa01.c | 32 ++++++++++++------------ drivers/regulator/s2mps11.c | 50 ++++++++++++++++++------------------- include/linux/mfd/samsung/core.h | 21 ++++++++++++++++ include/linux/mfd/samsung/s2mpa01.h | 12 --------- include/linux/mfd/samsung/s2mps11.h | 9 ------- include/linux/mfd/samsung/s2mps14.h | 10 -------- 6 files changed, 62 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/s2mpa01.c b/drivers/regulator/s2mpa01.c index ee83b4876420..962c5f192f7c 100644 --- a/drivers/regulator/s2mpa01.c +++ b/drivers/regulator/s2mpa01.c @@ -241,8 +241,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_ldo_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_LDO_MIN, \ - .uV_step = S2MPA01_LDO_STEP1, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_50_MV, \ .n_voltages = S2MPA01_LDO_N_VOLTAGES, \ .vsel_reg = S2MPA01_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPA01_LDO_VSEL_MASK, \ @@ -255,8 +255,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_ldo_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_LDO_MIN, \ - .uV_step = S2MPA01_LDO_STEP2, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_25_MV, \ .n_voltages = S2MPA01_LDO_N_VOLTAGES, \ .vsel_reg = S2MPA01_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPA01_LDO_VSEL_MASK, \ @@ -270,8 +270,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_BUCK_MIN1, \ - .uV_step = S2MPA01_BUCK_STEP1, \ + .min_uV = MIN_600_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPA01_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPA01_RAMP_DELAY, \ .vsel_reg = S2MPA01_REG_B1CTRL2 + (num - 1) * 2, \ @@ -286,8 +286,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_BUCK_MIN2, \ - .uV_step = S2MPA01_BUCK_STEP1, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPA01_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPA01_RAMP_DELAY, \ .vsel_reg = S2MPA01_REG_B5CTRL2, \ @@ -302,8 +302,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_BUCK_MIN1, \ - .uV_step = S2MPA01_BUCK_STEP1, \ + .min_uV = MIN_600_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPA01_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPA01_RAMP_DELAY, \ .vsel_reg = S2MPA01_REG_B6CTRL2 + (num - 6) * 2, \ @@ -318,8 +318,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_BUCK_MIN2, \ - .uV_step = S2MPA01_BUCK_STEP2, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_12_5_MV, \ .n_voltages = S2MPA01_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPA01_RAMP_DELAY, \ .vsel_reg = S2MPA01_REG_B8CTRL2, \ @@ -334,8 +334,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_BUCK_MIN4, \ - .uV_step = S2MPA01_BUCK_STEP2, \ + .min_uV = MIN_1500_MV, \ + .uV_step = STEP_12_5_MV, \ .n_voltages = S2MPA01_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPA01_RAMP_DELAY, \ .vsel_reg = S2MPA01_REG_B9CTRL2, \ @@ -350,8 +350,8 @@ static struct regulator_ops s2mpa01_buck_ops = { .ops = &s2mpa01_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPA01_BUCK_MIN3, \ - .uV_step = S2MPA01_BUCK_STEP2, \ + .min_uV = MIN_1000_MV, \ + .uV_step = STEP_12_5_MV, \ .n_voltages = S2MPA01_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPA01_RAMP_DELAY, \ .vsel_reg = S2MPA01_REG_B10CTRL2, \ diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index b16c53a8272f..3dede776a837 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -255,14 +255,14 @@ static struct regulator_ops s2mps11_buck_ops = { .set_ramp_delay = s2mps11_set_ramp_delay, }; -#define regulator_desc_s2mps11_ldo1(num) { \ +#define regulator_desc_s2mps11_ldo1(num) { \ .name = "LDO"#num, \ .id = S2MPS11_LDO##num, \ .ops = &s2mps11_ldo_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_LDO_MIN, \ - .uV_step = S2MPS11_LDO_STEP1, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_50_MV, \ .n_voltages = S2MPS11_LDO_N_VOLTAGES, \ .vsel_reg = S2MPS11_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPS11_LDO_VSEL_MASK, \ @@ -275,8 +275,8 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_ldo_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_LDO_MIN, \ - .uV_step = S2MPS11_LDO_STEP2, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_25_MV, \ .n_voltages = S2MPS11_LDO_N_VOLTAGES, \ .vsel_reg = S2MPS11_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPS11_LDO_VSEL_MASK, \ @@ -290,8 +290,8 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_BUCK_MIN1, \ - .uV_step = S2MPS11_BUCK_STEP1, \ + .min_uV = MIN_600_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPS11_RAMP_DELAY, \ .vsel_reg = S2MPS11_REG_B1CTRL2 + (num - 1) * 2, \ @@ -306,8 +306,8 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_BUCK_MIN1, \ - .uV_step = S2MPS11_BUCK_STEP1, \ + .min_uV = MIN_600_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPS11_RAMP_DELAY, \ .vsel_reg = S2MPS11_REG_B5CTRL2, \ @@ -322,8 +322,8 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_BUCK_MIN1, \ - .uV_step = S2MPS11_BUCK_STEP1, \ + .min_uV = MIN_600_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPS11_RAMP_DELAY, \ .vsel_reg = S2MPS11_REG_B6CTRL2 + (num - 6) * 2, \ @@ -338,8 +338,8 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_BUCK_MIN3, \ - .uV_step = S2MPS11_BUCK_STEP3, \ + .min_uV = MIN_3000_MV, \ + .uV_step = STEP_25_MV, \ .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPS11_RAMP_DELAY, \ .vsel_reg = S2MPS11_REG_B9CTRL2, \ @@ -354,8 +354,8 @@ static struct regulator_ops s2mps11_buck_ops = { .ops = &s2mps11_buck_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS11_BUCK_MIN2, \ - .uV_step = S2MPS11_BUCK_STEP2, \ + .min_uV = MIN_750_MV, \ + .uV_step = STEP_12_5_MV, \ .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \ .ramp_delay = S2MPS11_RAMP_DELAY, \ .vsel_reg = S2MPS11_REG_B10CTRL2, \ @@ -516,8 +516,8 @@ static struct regulator_ops s2mps14_reg_ops = { .ops = &s2mps14_reg_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS14_LDO_MIN_800MV, \ - .uV_step = S2MPS14_LDO_STEP_25MV, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_25_MV, \ .n_voltages = S2MPS14_LDO_N_VOLTAGES, \ .vsel_reg = S2MPS14_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPS14_LDO_VSEL_MASK, \ @@ -530,8 +530,8 @@ static struct regulator_ops s2mps14_reg_ops = { .ops = &s2mps14_reg_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS14_LDO_MIN_1800MV, \ - .uV_step = S2MPS14_LDO_STEP_25MV, \ + .min_uV = MIN_1800_MV, \ + .uV_step = STEP_25_MV, \ .n_voltages = S2MPS14_LDO_N_VOLTAGES, \ .vsel_reg = S2MPS14_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPS14_LDO_VSEL_MASK, \ @@ -544,8 +544,8 @@ static struct regulator_ops s2mps14_reg_ops = { .ops = &s2mps14_reg_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS14_LDO_MIN_800MV, \ - .uV_step = S2MPS14_LDO_STEP_12_5MV, \ + .min_uV = MIN_800_MV, \ + .uV_step = STEP_12_5_MV, \ .n_voltages = S2MPS14_LDO_N_VOLTAGES, \ .vsel_reg = S2MPS14_REG_L1CTRL + num - 1, \ .vsel_mask = S2MPS14_LDO_VSEL_MASK, \ @@ -558,8 +558,8 @@ static struct regulator_ops s2mps14_reg_ops = { .ops = &s2mps14_reg_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS14_BUCK1235_MIN_600MV, \ - .uV_step = S2MPS14_BUCK1235_STEP_6_25MV, \ + .min_uV = MIN_600_MV, \ + .uV_step = STEP_6_25_MV, \ .n_voltages = S2MPS14_BUCK_N_VOLTAGES, \ .linear_min_sel = S2MPS14_BUCK1235_START_SEL, \ .ramp_delay = S2MPS14_BUCK_RAMP_DELAY, \ @@ -574,8 +574,8 @@ static struct regulator_ops s2mps14_reg_ops = { .ops = &s2mps14_reg_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ - .min_uV = S2MPS14_BUCK4_MIN_1400MV, \ - .uV_step = S2MPS14_BUCK4_STEP_12_5MV, \ + .min_uV = MIN_1400_MV, \ + .uV_step = STEP_12_5_MV, \ .n_voltages = S2MPS14_BUCK_N_VOLTAGES, \ .linear_min_sel = S2MPS14_BUCK4_START_SEL, \ .ramp_delay = S2MPS14_BUCK_RAMP_DELAY, \ diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index b5f73de81aad..1825edacbda7 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -14,6 +14,27 @@ #ifndef __LINUX_MFD_SEC_CORE_H #define __LINUX_MFD_SEC_CORE_H +/* Macros to represent minimum voltages for LDO/BUCK */ +#define MIN_3000_MV 3000000 +#define MIN_2500_MV 2500000 +#define MIN_2000_MV 2000000 +#define MIN_1800_MV 1800000 +#define MIN_1500_MV 1500000 +#define MIN_1400_MV 1400000 +#define MIN_1000_MV 1000000 + +#define MIN_900_MV 900000 +#define MIN_850_MV 850000 +#define MIN_800_MV 800000 +#define MIN_750_MV 750000 +#define MIN_600_MV 600000 + +/* Macros to represent steps for LDO/BUCK */ +#define STEP_50_MV 50000 +#define STEP_25_MV 25000 +#define STEP_12_5_MV 12500 +#define STEP_6_25_MV 6250 + enum sec_device_type { S5M8751X, S5M8763X, diff --git a/include/linux/mfd/samsung/s2mpa01.h b/include/linux/mfd/samsung/s2mpa01.h index fbc63bc0d6a2..2766108bca2f 100644 --- a/include/linux/mfd/samsung/s2mpa01.h +++ b/include/linux/mfd/samsung/s2mpa01.h @@ -155,18 +155,6 @@ enum s2mpa01_regulators { S2MPA01_REGULATOR_MAX, }; -#define S2MPA01_BUCK_MIN1 600000 -#define S2MPA01_BUCK_MIN2 800000 -#define S2MPA01_BUCK_MIN3 1000000 -#define S2MPA01_BUCK_MIN4 1500000 -#define S2MPA01_LDO_MIN 800000 - -#define S2MPA01_BUCK_STEP1 6250 -#define S2MPA01_BUCK_STEP2 12500 - -#define S2MPA01_LDO_STEP1 50000 -#define S2MPA01_LDO_STEP2 25000 - #define S2MPA01_LDO_VSEL_MASK 0x3F #define S2MPA01_BUCK_VSEL_MASK 0xFF #define S2MPA01_ENABLE_MASK (0x03 << S2MPA01_ENABLE_SHIFT) diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h index b3ddf98dec37..7981a9d77d3f 100644 --- a/include/linux/mfd/samsung/s2mps11.h +++ b/include/linux/mfd/samsung/s2mps11.h @@ -171,15 +171,6 @@ enum s2mps11_regulators { S2MPS11_REGULATOR_MAX, }; -#define S2MPS11_BUCK_MIN1 600000 -#define S2MPS11_BUCK_MIN2 750000 -#define S2MPS11_BUCK_MIN3 3000000 -#define S2MPS11_LDO_MIN 800000 -#define S2MPS11_BUCK_STEP1 6250 -#define S2MPS11_BUCK_STEP2 12500 -#define S2MPS11_BUCK_STEP3 25000 -#define S2MPS11_LDO_STEP1 50000 -#define S2MPS11_LDO_STEP2 25000 #define S2MPS11_LDO_VSEL_MASK 0x3F #define S2MPS11_BUCK_VSEL_MASK 0xFF #define S2MPS11_ENABLE_MASK (0x03 << S2MPS11_ENABLE_SHIFT) diff --git a/include/linux/mfd/samsung/s2mps14.h b/include/linux/mfd/samsung/s2mps14.h index 900cd7a04314..c92f4782afb5 100644 --- a/include/linux/mfd/samsung/s2mps14.h +++ b/include/linux/mfd/samsung/s2mps14.h @@ -123,10 +123,6 @@ enum s2mps14_regulators { }; /* Regulator constraints for BUCKx */ -#define S2MPS14_BUCK1235_MIN_600MV 600000 -#define S2MPS14_BUCK4_MIN_1400MV 1400000 -#define S2MPS14_BUCK1235_STEP_6_25MV 6250 -#define S2MPS14_BUCK4_STEP_12_5MV 12500 #define S2MPS14_BUCK1235_START_SEL 0x20 #define S2MPS14_BUCK4_START_SEL 0x40 /* @@ -136,12 +132,6 @@ enum s2mps14_regulators { */ #define S2MPS14_BUCK_RAMP_DELAY 12500 -/* Regulator constraints for different types of LDOx */ -#define S2MPS14_LDO_MIN_800MV 800000 -#define S2MPS14_LDO_MIN_1800MV 1800000 -#define S2MPS14_LDO_STEP_12_5MV 12500 -#define S2MPS14_LDO_STEP_25MV 25000 - #define S2MPS14_LDO_VSEL_MASK 0x3F #define S2MPS14_BUCK_VSEL_MASK 0xFF #define S2MPS14_ENABLE_MASK (0x03 << S2MPS14_ENABLE_SHIFT) -- cgit v1.2.3 From 272e2315fac3bfca0edfa3252b8a643c425602af Mon Sep 17 00:00:00 2001 From: Guodong Xu Date: Wed, 13 Aug 2014 19:33:38 +0800 Subject: regulator: core: add const qualifier to ops in struct regulator_desc struct regulator_ops *ops is a member in struct regulator_desc, which gets its value from individual regulator driver upon regulator_register() and is used by regulator core APIs. It's not allowed for regulator core to modify any of these callbacks in *ops. Add 'const' qualifier to enforce that. Signed-off-by: Guodong Xu Signed-off-by: Mark Brown --- drivers/regulator/core.c | 24 ++++++++++++------------ include/linux/regulator/driver.h | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index a3c3785901f5..052e7f1f011d 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -839,7 +839,7 @@ static void print_constraints(struct regulator_dev *rdev) static int machine_constraints_voltage(struct regulator_dev *rdev, struct regulation_constraints *constraints) { - struct regulator_ops *ops = rdev->desc->ops; + const struct regulator_ops *ops = rdev->desc->ops; int ret; /* do we need to apply the constraint voltage */ @@ -938,7 +938,7 @@ static int machine_constraints_voltage(struct regulator_dev *rdev, static int machine_constraints_current(struct regulator_dev *rdev, struct regulation_constraints *constraints) { - struct regulator_ops *ops = rdev->desc->ops; + const struct regulator_ops *ops = rdev->desc->ops; int ret; if (!constraints->min_uA && !constraints->max_uA) @@ -982,7 +982,7 @@ static int set_machine_constraints(struct regulator_dev *rdev, const struct regulation_constraints *constraints) { int ret = 0; - struct regulator_ops *ops = rdev->desc->ops; + const struct regulator_ops *ops = rdev->desc->ops; if (constraints) rdev->constraints = kmemdup(constraints, sizeof(*constraints), @@ -2208,9 +2208,9 @@ EXPORT_SYMBOL_GPL(regulator_count_voltages); */ int regulator_list_voltage(struct regulator *regulator, unsigned selector) { - struct regulator_dev *rdev = regulator->rdev; - struct regulator_ops *ops = rdev->desc->ops; - int ret; + struct regulator_dev *rdev = regulator->rdev; + const struct regulator_ops *ops = rdev->desc->ops; + int ret; if (rdev->desc->fixed_uV && rdev->desc->n_voltages == 1 && !selector) return rdev->desc->fixed_uV; @@ -2572,8 +2572,8 @@ EXPORT_SYMBOL_GPL(regulator_set_voltage); int regulator_set_voltage_time(struct regulator *regulator, int old_uV, int new_uV) { - struct regulator_dev *rdev = regulator->rdev; - struct regulator_ops *ops = rdev->desc->ops; + struct regulator_dev *rdev = regulator->rdev; + const struct regulator_ops *ops = rdev->desc->ops; int old_sel = -1; int new_sel = -1; int voltage; @@ -3336,9 +3336,9 @@ EXPORT_SYMBOL_GPL(regulator_mode_to_status); */ static int add_regulator_attributes(struct regulator_dev *rdev) { - struct device *dev = &rdev->dev; - struct regulator_ops *ops = rdev->desc->ops; - int status = 0; + struct device *dev = &rdev->dev; + const struct regulator_ops *ops = rdev->desc->ops; + int status = 0; /* some attributes need specific methods to be displayed */ if ((ops->get_voltage && ops->get_voltage(rdev) >= 0) || @@ -3905,7 +3905,7 @@ core_initcall(regulator_init); static int __init regulator_init_complete(void) { struct regulator_dev *rdev; - struct regulator_ops *ops; + const struct regulator_ops *ops; struct regulation_constraints *c; int enabled, ret; diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index bbe03a1924c0..4b628139a9cb 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -245,7 +245,7 @@ struct regulator_desc { int id; bool continuous_voltage_range; unsigned n_voltages; - struct regulator_ops *ops; + const struct regulator_ops *ops; int irq; enum regulator_type type; struct module *owner; -- cgit v1.2.3 From 871f565055ed232e5751da18a331b73e8254adaf Mon Sep 17 00:00:00 2001 From: Guodong Xu Date: Wed, 13 Aug 2014 19:33:40 +0800 Subject: regulator: core: add guard delay between calling regulator_disable and _enable Some regulator require a minimum delay between its disable and next enable. This is to avoid damages when out-of-range frequent disable/enable of a single regulator can bring to the regulator chip. Add @off_on_delay to struct regulator_desc. Device drivers' can use this field to set this guard time. Add @last_off_jiffy to struct regulator_dev. When @off_on_delay is set by driver, regulator core can store its last off (disable) time into this field. Signed-off-by: Guodong Xu Signed-off-by: Mark Brown --- drivers/regulator/core.c | 31 +++++++++++++++++++++++++++++++ include/linux/regulator/driver.h | 6 ++++++ 2 files changed, 37 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index dc0e9813b62d..1e976b6320a2 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1813,6 +1813,31 @@ static int _regulator_do_enable(struct regulator_dev *rdev) trace_regulator_enable(rdev_get_name(rdev)); + if (rdev->desc->off_on_delay) { + /* if needed, keep a distance of off_on_delay from last time + * this regulator was disabled. + */ + unsigned long start_jiffy = jiffies; + unsigned long intended, max_delay, remaining; + + max_delay = usecs_to_jiffies(rdev->desc->off_on_delay); + intended = rdev->last_off_jiffy + max_delay; + + if (time_before(start_jiffy, intended)) { + /* calc remaining jiffies to deal with one-time + * timer wrapping. + * in case of multiple timer wrapping, either it can be + * detected by out-of-range remaining, or it cannot be + * detected and we gets a panelty of + * _regulator_enable_delay(). + */ + remaining = intended - start_jiffy; + if (remaining <= max_delay) + _regulator_enable_delay( + jiffies_to_usecs(remaining)); + } + } + if (rdev->ena_pin) { ret = regulator_ena_gpio_ctrl(rdev, true); if (ret < 0) @@ -1925,6 +1950,12 @@ static int _regulator_do_disable(struct regulator_dev *rdev) return ret; } + /* cares about last_off_jiffy only if off_on_delay is required by + * device. + */ + if (rdev->desc->off_on_delay) + rdev->last_off_jiffy = jiffies; + trace_regulator_disable_complete(rdev_get_name(rdev)); return 0; diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 4b628139a9cb..efe058f8f746 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -238,6 +238,7 @@ enum regulator_type { * @bypass_val_off: Disabling value for control when using regmap set_bypass * * @enable_time: Time taken for initial enable of regulator (in uS). + * @off_on_delay: guard time (in uS), before re-enabling a regulator */ struct regulator_desc { const char *name; @@ -276,6 +277,8 @@ struct regulator_desc { unsigned int bypass_val_off; unsigned int enable_time; + + unsigned int off_on_delay; }; /** @@ -348,6 +351,9 @@ struct regulator_dev { struct regulator_enable_gpio *ena_pin; unsigned int ena_gpio_state:1; + + /* time when this regulator was disabled last time */ + unsigned long last_off_jiffy; }; struct regulator_dev * -- cgit v1.2.3 From 983c684466e02b21f83c025ea539deee6c0aeac0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Aug 2014 13:03:10 -0400 Subject: SUNRPC: get rid of the request wait queue We're always _only_ waking up tasks from within the sp_threads list, so we know that they are enqueued and alive. The rq_wait waitqueue is just a distraction with extra atomic semantics. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 - net/sunrpc/svc.c | 2 -- net/sunrpc/svc_xprt.c | 32 +++++++++++++++++--------------- 3 files changed, 17 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index cf61ecd148e0..21678464883a 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -280,7 +280,6 @@ struct svc_rqst { bool rq_splice_ok; /* turned off in gss privacy * to prevent encrypting page * cache pages */ - wait_queue_head_t rq_wait; /* synchronization */ struct task_struct *rq_task; /* service thread */ }; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 1db5007ddbce..ca8a7958f4e6 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -612,8 +612,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) goto out_enomem; - init_waitqueue_head(&rqstp->rq_wait); - serv->sv_nrthreads++; spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads++; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 08e49d1e17b3..faaf2b46273b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -348,8 +348,6 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) cpu = get_cpu(); pool = svc_pool_for_cpu(xprt->xpt_server, cpu); - put_cpu(); - spin_lock_bh(&pool->sp_lock); if (!list_empty(&pool->sp_threads) && @@ -382,10 +380,15 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) printk(KERN_ERR "svc_xprt_enqueue: server %p, rq_xprt=%p!\n", rqstp, rqstp->rq_xprt); - rqstp->rq_xprt = xprt; + /* Note the order of the following 3 lines: + * We want to assign xprt to rqstp->rq_xprt only _after_ + * we've woken up the process, so that we don't race with + * the lockless check in svc_get_next_xprt(). + */ svc_xprt_get(xprt); + wake_up_process(rqstp->rq_task); + rqstp->rq_xprt = xprt; pool->sp_stats.threads_woken++; - wake_up(&rqstp->rq_wait); } else { dprintk("svc: transport %p put into queue\n", xprt); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); @@ -394,6 +397,7 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) out_unlock: spin_unlock_bh(&pool->sp_lock); + put_cpu(); } /* @@ -509,7 +513,7 @@ void svc_wake_up(struct svc_serv *serv) svc_thread_dequeue(pool, rqstp); rqstp->rq_xprt = NULL; */ - wake_up(&rqstp->rq_wait); + wake_up_process(rqstp->rq_task); } else pool->sp_task_pending = 1; spin_unlock_bh(&pool->sp_lock); @@ -628,7 +632,6 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) { struct svc_xprt *xprt; struct svc_pool *pool = rqstp->rq_pool; - DECLARE_WAITQUEUE(wait, current); long time_left; /* Normally we will wait up to 5 seconds for any required @@ -654,15 +657,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) xprt = ERR_PTR(-EAGAIN); goto out; } - /* No data pending. Go to sleep */ - svc_thread_enqueue(pool, rqstp); - /* * We have to be able to interrupt this wait * to bring down the daemons ... */ set_current_state(TASK_INTERRUPTIBLE); + /* No data pending. Go to sleep */ + svc_thread_enqueue(pool, rqstp); + /* * checking kthread_should_stop() here allows us to avoid * locking and signalling when stopping kthreads that call @@ -676,14 +679,13 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) goto out; } - add_wait_queue(&rqstp->rq_wait, &wait); spin_unlock_bh(&pool->sp_lock); time_left = schedule_timeout(timeout); + __set_current_state(TASK_RUNNING); try_to_freeze(); - remove_wait_queue(&rqstp->rq_wait, &wait); xprt = rqstp->rq_xprt; if (xprt != NULL) return xprt; @@ -786,10 +788,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) printk(KERN_ERR "svc_recv: service %p, transport not NULL!\n", rqstp); - if (waitqueue_active(&rqstp->rq_wait)) - printk(KERN_ERR - "svc_recv: service %p, wait queue active!\n", - rqstp); + + /* Make sure the task pointer is set! */ + if (WARN_ON_ONCE(!rqstp->rq_task)) + rqstp->rq_task = current_task; err = svc_alloc_arg(rqstp); if (err) -- cgit v1.2.3 From 716845ebeb505353d900320b4a74e8330520410d Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 18 Aug 2014 10:34:08 +0800 Subject: regulator: core: Fix build error due to const qualifier for ops Drop const qualifier for ops of struct regulator_desc. Allow regulator drivers to update ops before registering regulator. Fix below build error: CC [M] drivers/regulator/mc13892-regulator.o drivers/regulator/mc13892-regulator.c: In function 'mc13892_regulator_probe': drivers/regulator/mc13892-regulator.c:586:3: error: assignment of member 'set_mode' in read-only object drivers/regulator/mc13892-regulator.c:588:3: error: assignment of member 'get_mode' in read-only object make[2]: *** [drivers/regulator/mc13892-regulator.o] Error 1 make[1]: *** [drivers/regulator] Error 2 make: *** [drivers] Error 2 Reported-by: Stephen Rothwell Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index efe058f8f746..3abda7554d82 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -246,7 +246,7 @@ struct regulator_desc { int id; bool continuous_voltage_range; unsigned n_voltages; - const struct regulator_ops *ops; + struct regulator_ops *ops; int irq; enum regulator_type type; struct module *owner; -- cgit v1.2.3 From e5f81539f657af7e9f54ea37986fde8f92acef22 Mon Sep 17 00:00:00 2001 From: Feng Kan Date: Wed, 30 Jul 2014 14:56:58 -0700 Subject: irqchip: gic: Replace hex numbers with defines. This is to cleanup some hex numbers used in the code and replace them with defines to make the code cleaner. Signed-off-by: Feng Kan Reviewed-by: Anup Patel Link: https://lkml.kernel.org/r/1406757419-18729-2-git-send-email-fkan@apm.com Signed-off-by: Jason Cooper --- drivers/irqchip/irq-gic-common.c | 15 +++++++++------ drivers/irqchip/irq-gic.c | 25 +++++++++++++------------ include/linux/irqchip/arm-gic.h | 15 +++++++++++++++ 3 files changed, 37 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c index 60ac704d2090..61541ff24397 100644 --- a/drivers/irqchip/irq-gic-common.c +++ b/drivers/irqchip/irq-gic-common.c @@ -74,20 +74,22 @@ void __init gic_dist_config(void __iomem *base, int gic_irqs, * Set all global interrupts to be level triggered, active low. */ for (i = 32; i < gic_irqs; i += 16) - writel_relaxed(0, base + GIC_DIST_CONFIG + i / 4); + writel_relaxed(GICD_INT_ACTLOW_LVLTRIG, + base + GIC_DIST_CONFIG + i / 4); /* * Set priority on all global interrupts. */ for (i = 32; i < gic_irqs; i += 4) - writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i); + writel_relaxed(GICD_INT_DEF_PRI_X4, base + GIC_DIST_PRI + i); /* * Disable all interrupts. Leave the PPI and SGIs alone * as they are enabled by redistributor registers. */ for (i = 32; i < gic_irqs; i += 32) - writel_relaxed(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i / 8); + writel_relaxed(GICD_INT_EN_CLR_X32, + base + GIC_DIST_ENABLE_CLEAR + i / 8); if (sync_access) sync_access(); @@ -101,14 +103,15 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void)) * Deal with the banked PPI and SGI interrupts - disable all * PPI interrupts, ensure all SGI interrupts are enabled. */ - writel_relaxed(0xffff0000, base + GIC_DIST_ENABLE_CLEAR); - writel_relaxed(0x0000ffff, base + GIC_DIST_ENABLE_SET); + writel_relaxed(GICD_INT_EN_CLR_PPI, base + GIC_DIST_ENABLE_CLEAR); + writel_relaxed(GICD_INT_EN_SET_SGI, base + GIC_DIST_ENABLE_SET); /* * Set priority on PPI and SGI interrupts */ for (i = 0; i < 32; i += 4) - writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4); + writel_relaxed(GICD_INT_DEF_PRI_X4, + base + GIC_DIST_PRI + i * 4 / 4); if (sync_access) sync_access(); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 4b959e606fe8..35847453cecb 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -298,8 +298,8 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) status = readl_relaxed(gic_data_cpu_base(chip_data) + GIC_CPU_INTACK); raw_spin_unlock(&irq_controller_lock); - gic_irq = (status & 0x3ff); - if (gic_irq == 1023) + gic_irq = (status & GICC_IAR_INT_ID_MASK); + if (gic_irq == GICC_INT_SPURIOUS) goto out; cascade_irq = irq_find_mapping(chip_data->domain, gic_irq); @@ -360,7 +360,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic) unsigned int gic_irqs = gic->gic_irqs; void __iomem *base = gic_data_dist_base(gic); - writel_relaxed(0, base + GIC_DIST_CTRL); + writel_relaxed(GICD_DISABLE, base + GIC_DIST_CTRL); /* * Set all global interrupts to this CPU only. @@ -373,7 +373,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic) gic_dist_config(base, gic_irqs, NULL); - writel_relaxed(1, base + GIC_DIST_CTRL); + writel_relaxed(GICD_ENABLE, base + GIC_DIST_CTRL); } static void gic_cpu_init(struct gic_chip_data *gic) @@ -400,8 +400,8 @@ static void gic_cpu_init(struct gic_chip_data *gic) gic_cpu_config(dist_base, NULL); - writel_relaxed(0xf0, base + GIC_CPU_PRIMASK); - writel_relaxed(1, base + GIC_CPU_CTRL); + writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK); + writel_relaxed(GICC_ENABLE, base + GIC_CPU_CTRL); } void gic_cpu_if_down(void) @@ -467,14 +467,14 @@ static void gic_dist_restore(unsigned int gic_nr) if (!dist_base) return; - writel_relaxed(0, dist_base + GIC_DIST_CTRL); + writel_relaxed(GICD_DISABLE, dist_base + GIC_DIST_CTRL); for (i = 0; i < DIV_ROUND_UP(gic_irqs, 16); i++) writel_relaxed(gic_data[gic_nr].saved_spi_conf[i], dist_base + GIC_DIST_CONFIG + i * 4); for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++) - writel_relaxed(0xa0a0a0a0, + writel_relaxed(GICD_INT_DEF_PRI_X4, dist_base + GIC_DIST_PRI + i * 4); for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++) @@ -485,7 +485,7 @@ static void gic_dist_restore(unsigned int gic_nr) writel_relaxed(gic_data[gic_nr].saved_spi_enable[i], dist_base + GIC_DIST_ENABLE_SET + i * 4); - writel_relaxed(1, dist_base + GIC_DIST_CTRL); + writel_relaxed(GICD_ENABLE, dist_base + GIC_DIST_CTRL); } static void gic_cpu_save(unsigned int gic_nr) @@ -539,10 +539,11 @@ static void gic_cpu_restore(unsigned int gic_nr) writel_relaxed(ptr[i], dist_base + GIC_DIST_CONFIG + i * 4); for (i = 0; i < DIV_ROUND_UP(32, 4); i++) - writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4); + writel_relaxed(GICD_INT_DEF_PRI_X4, + dist_base + GIC_DIST_PRI + i * 4); - writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK); - writel_relaxed(1, cpu_base + GIC_CPU_CTRL); + writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK); + writel_relaxed(GICC_ENABLE, cpu_base + GIC_CPU_CTRL); } static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 45e2d8c15bd2..5cb9d41af5be 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -21,7 +21,10 @@ #define GIC_CPU_ACTIVEPRIO 0xd0 #define GIC_CPU_IDENT 0xfc +#define GICC_ENABLE 0x1 +#define GICC_INT_PRI_THRESHOLD 0xf0 #define GICC_IAR_INT_ID_MASK 0x3ff +#define GICC_INT_SPURIOUS 1023 #define GIC_DIST_CTRL 0x000 #define GIC_DIST_CTR 0x004 @@ -39,6 +42,18 @@ #define GIC_DIST_SGI_PENDING_CLEAR 0xf10 #define GIC_DIST_SGI_PENDING_SET 0xf20 +#define GICD_ENABLE 0x1 +#define GICD_DISABLE 0x0 +#define GICD_INT_ACTLOW_LVLTRIG 0x0 +#define GICD_INT_EN_CLR_X32 0xffffffff +#define GICD_INT_EN_SET_SGI 0x0000ffff +#define GICD_INT_EN_CLR_PPI 0xffff0000 +#define GICD_INT_DEF_PRI 0xa0 +#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ + (GICD_INT_DEF_PRI << 16) |\ + (GICD_INT_DEF_PRI << 8) |\ + GICD_INT_DEF_PRI) + #define GICH_HCR 0x0 #define GICH_VTR 0x4 #define GICH_VMCR 0x8 -- cgit v1.2.3 From 3228950621d92f0f212378f95a6998ef3a1be0bb Mon Sep 17 00:00:00 2001 From: Feng Kan Date: Wed, 30 Jul 2014 14:56:59 -0700 Subject: irqchip: gic: Preserve gic V2 bypass bits in cpu ctrl register This change is made to preserve the GIC v2 bypass bits in the GIC_CPU_CTRL register (also known as the GICC_CTLR register in spec). This code will preserve all bits configured by the bootloader regarding v2 bypass group bits. In the X-Gene platform, the bypass functionality is not used and bypass bits should not be changed by the kernel gic code as it could lead to incorrect behavior. Signed-off-by: Feng Kan Reviewed-by: Vinayak Kale Reviewed-by: Anup Patel Link: https://lkml.kernel.org/r/1406757419-18729-3-git-send-email-fkan@apm.com Signed-off-by: Jason Cooper --- drivers/irqchip/irq-gic.c | 25 ++++++++++++++++++++++--- include/linux/irqchip/arm-gic.h | 1 + 2 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 35847453cecb..2500f6ba29e1 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -353,6 +353,21 @@ static u8 gic_get_cpumask(struct gic_chip_data *gic) return mask; } +static void gic_cpu_if_up(void) +{ + void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]); + u32 bypass = 0; + + /* + * Preserve bypass disable bits to be written back later + */ + bypass = readl(cpu_base + GIC_CPU_CTRL); + bypass &= GICC_DIS_BYPASS_MASK; + + writel_relaxed(bypass | GICC_ENABLE, cpu_base + GIC_CPU_CTRL); +} + + static void __init gic_dist_init(struct gic_chip_data *gic) { unsigned int i; @@ -401,13 +416,17 @@ static void gic_cpu_init(struct gic_chip_data *gic) gic_cpu_config(dist_base, NULL); writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK); - writel_relaxed(GICC_ENABLE, base + GIC_CPU_CTRL); + gic_cpu_if_up(); } void gic_cpu_if_down(void) { void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]); - writel_relaxed(0, cpu_base + GIC_CPU_CTRL); + u32 val = 0; + + val = readl(cpu_base + GIC_CPU_CTRL); + val &= ~GICC_ENABLE; + writel_relaxed(val, cpu_base + GIC_CPU_CTRL); } #ifdef CONFIG_CPU_PM @@ -543,7 +562,7 @@ static void gic_cpu_restore(unsigned int gic_nr) dist_base + GIC_DIST_PRI + i * 4); writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK); - writel_relaxed(GICC_ENABLE, cpu_base + GIC_CPU_CTRL); + gic_cpu_if_up(); } static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 5cb9d41af5be..13eed92c7d24 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -25,6 +25,7 @@ #define GICC_INT_PRI_THRESHOLD 0xf0 #define GICC_IAR_INT_ID_MASK 0x3ff #define GICC_INT_SPURIOUS 1023 +#define GICC_DIS_BYPASS_MASK 0x1e0 #define GIC_DIST_CTRL 0x000 #define GIC_DIST_CTR 0x004 -- cgit v1.2.3 From 7b7d8982f0169d5ac67c6c2877449fb7f6968cac Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 27 Jul 2014 14:31:53 -0700 Subject: mtd: fix linux/mtd/nand.h kernel-doc warning Fix kernel-doc warning in : Warning(..//include/linux/mtd/nand.h:795): No description found for parameter 'ecc' Signed-off-by: Randy Dunlap Cc: David Woodhouse Cc: Brian Norris Cc: linux-mtd@lists.infradead.org Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 3083c53e0270..b7c11991cb09 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -766,6 +766,7 @@ struct nand_chip { * @options: stores various chip bit options * @id_len: The valid length of the @id. * @oobsize: OOB size + * @ecc: ECC correctability and step information from the datasheet. * @ecc.strength_ds: The ECC correctability from the datasheet, same as the * @ecc_strength_ds in nand_chip{}. * @ecc.step_ds: The ECC step required by the @ecc.strength_ds, same as the -- cgit v1.2.3 From 31f754628cbb12c983600f22d9f0fed50dfe2134 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:07:22 -0700 Subject: mtd: use __packed shorthand Signed-off-by: Brian Norris --- drivers/mtd/mtdswap.c | 2 +- drivers/mtd/nand/sm_common.h | 2 +- include/linux/mtd/cfi.h | 22 +++++++++++----------- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 48cf6f98df44..fc8b3d16cce7 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -145,7 +145,7 @@ struct mtdswap_dev { struct mtdswap_oobdata { __le16 magic; __le32 count; -} __attribute__((packed)); +} __packed; #define MTDSWAP_MAGIC_CLEAN 0x2095 #define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1) diff --git a/drivers/mtd/nand/sm_common.h b/drivers/mtd/nand/sm_common.h index 00f4a83359b2..d3e028e58b0f 100644 --- a/drivers/mtd/nand/sm_common.h +++ b/drivers/mtd/nand/sm_common.h @@ -18,7 +18,7 @@ struct sm_oob { uint8_t ecc2[3]; uint8_t lba_copy2[2]; uint8_t ecc1[3]; -} __attribute__((packed)); +} __packed; /* one sector is always 512 bytes, but it can consist of two nand pages */ diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 37ef6b194089..299d7d31fe53 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -153,7 +153,7 @@ struct cfi_ident { uint16_t MaxBufWriteSize; uint8_t NumEraseRegions; uint32_t EraseRegionInfo[0]; /* Not host ordered */ -} __attribute__((packed)); +} __packed; /* Extended Query Structure for both PRI and ALT */ @@ -161,7 +161,7 @@ struct cfi_extquery { uint8_t pri[3]; uint8_t MajorVersion; uint8_t MinorVersion; -} __attribute__((packed)); +} __packed; /* Vendor-Specific PRI for Intel/Sharp Extended Command Set (0x0001) */ @@ -180,7 +180,7 @@ struct cfi_pri_intelext { uint8_t FactProtRegSize; uint8_t UserProtRegSize; uint8_t extra[0]; -} __attribute__((packed)); +} __packed; struct cfi_intelext_otpinfo { uint32_t ProtRegAddr; @@ -188,7 +188,7 @@ struct cfi_intelext_otpinfo { uint8_t FactProtRegSize; uint16_t UserGroups; uint8_t UserProtRegSize; -} __attribute__((packed)); +} __packed; struct cfi_intelext_blockinfo { uint16_t NumIdentBlocks; @@ -196,7 +196,7 @@ struct cfi_intelext_blockinfo { uint16_t MinBlockEraseCycles; uint8_t BitsPerCell; uint8_t BlockCap; -} __attribute__((packed)); +} __packed; struct cfi_intelext_regioninfo { uint16_t NumIdentPartitions; @@ -205,7 +205,7 @@ struct cfi_intelext_regioninfo { uint8_t NumOpAllowedSimEraMode; uint8_t NumBlockTypes; struct cfi_intelext_blockinfo BlockTypes[1]; -} __attribute__((packed)); +} __packed; struct cfi_intelext_programming_regioninfo { uint8_t ProgRegShift; @@ -214,7 +214,7 @@ struct cfi_intelext_programming_regioninfo { uint8_t Reserved2; uint8_t ControlInvalid; uint8_t Reserved3; -} __attribute__((packed)); +} __packed; /* Vendor-Specific PRI for AMD/Fujitsu Extended Command Set (0x0002) */ @@ -233,7 +233,7 @@ struct cfi_pri_amdstd { uint8_t VppMin; uint8_t VppMax; uint8_t TopBottom; -} __attribute__((packed)); +} __packed; /* Vendor-Specific PRI for Atmel chips (command set 0x0002) */ @@ -245,18 +245,18 @@ struct cfi_pri_atmel { uint8_t BottomBoot; uint8_t BurstMode; uint8_t PageMode; -} __attribute__((packed)); +} __packed; struct cfi_pri_query { uint8_t NumFields; uint32_t ProtField[1]; /* Not host ordered */ -} __attribute__((packed)); +} __packed; struct cfi_bri_query { uint8_t PageModeReadCap; uint8_t NumFields; uint32_t ConfField[1]; /* Not host ordered */ -} __attribute__((packed)); +} __packed; #define P_ID_NONE 0x0000 #define P_ID_INTEL_EXT 0x0001 -- cgit v1.2.3 From df11e506d330d9a0e5a701cd2c5fcb7d461b6060 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 21 Aug 2014 10:11:34 +0800 Subject: regulator: core: Add back the const qualifier for ops of struct regulator_desc Fix below build warning: CC [M] drivers/regulator/hi6421-regulator.o drivers/regulator/hi6421-regulator.c:356:2: warning: initialization discards 'const' qualifier from pointer target type [enabled by default] This is a revert of commit 716845ebeb50 ("regulator: core: Fix build error due to const qualifier for ops"). The build error was fixed by commit 39f5460d7f9c ("regulator: core: add const to regulator_ops and fix build error in mc13892"). Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 3abda7554d82..efe058f8f746 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -246,7 +246,7 @@ struct regulator_desc { int id; bool continuous_voltage_range; unsigned n_voltages; - struct regulator_ops *ops; + const struct regulator_ops *ops; int irq; enum regulator_type type; struct module *owner; -- cgit v1.2.3 From e790d9ef6405633b007339d746b709aed43a928d Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 21 Aug 2014 18:08:05 +0200 Subject: KVM: add kvm_arch_sched_in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce preempt notifiers for architecture specific code. Advantage over creating a new notifier in every arch is slightly simpler code and guaranteed call order with respect to kvm_sched_in. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/arm/kvm/arm.c | 4 ++++ arch/mips/kvm/mips.c | 4 ++++ arch/powerpc/kvm/powerpc.c | 4 ++++ arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 4 ++++ include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 2 ++ 7 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a99e0cdf8ba2..9f788ebac55b 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -288,6 +288,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index cd7114147ae7..2362df2a79f9 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1002,6 +1002,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4c79284b58be..cbc432f4f0a6 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -720,6 +720,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvmppc_subarch_vcpu_uninit(vcpu); } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_BOOKE diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ce81eb2ab76a..a3c324ec4370 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -555,6 +555,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) /* Nothing todo */ } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { save_fp_ctl(&vcpu->arch.host_fpregs.fpc); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cd718c01cdf1..7d43dc7bb906 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7171,6 +7171,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) static_key_slow_dec(&kvm_no_apic_vcpu); } +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) +{ +} + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { if (type) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a4c33b34fe3f..ebd723676633 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -624,6 +624,8 @@ void kvm_arch_exit(void); int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); +void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); + void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 39b16035386f..5a0817ee996e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3124,6 +3124,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) if (vcpu->preempted) vcpu->preempted = false; + kvm_arch_sched_in(vcpu, cpu); + kvm_arch_vcpu_load(vcpu, cpu); } -- cgit v1.2.3 From 8913dc0bb913ac3dc83ed5c10bac2f4e55431981 Mon Sep 17 00:00:00 2001 From: Paul Zimmerman Date: Thu, 21 Aug 2014 20:28:20 +0000 Subject: usb: gadget: document a usb_ep_dequeue() requirement Document the requirement that the request be dequeued and its completion routine called before usb_ep_dequeue() returns. Also fix some capitalization issues in the existing text. Signed-off-by: Paul Zimmerman Acked-by: Alan Stern Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index c3a61853cd13..c540557b564b 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -345,12 +345,13 @@ static inline int usb_ep_queue(struct usb_ep *ep, * @ep:the endpoint associated with the request * @req:the request being canceled * - * if the request is still active on the endpoint, it is dequeued and its + * If the request is still active on the endpoint, it is dequeued and its * completion routine is called (with status -ECONNRESET); else a negative - * error code is returned. + * error code is returned. This is guaranteed to happen before the call to + * usb_ep_dequeue() returns. * - * note that some hardware can't clear out write fifos (to unlink the request - * at the head of the queue) except as part of disconnecting from usb. such + * Note that some hardware can't clear out write fifos (to unlink the request + * at the head of the queue) except as part of disconnecting from usb. Such * restrictions prevent drivers from supporting configuration changes, * even to configuration zero (a "chapter 9" requirement). */ -- cgit v1.2.3 From d4261e5650004d6d51137553ea5433d5828562dc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 19 Aug 2014 16:02:12 +0200 Subject: bonding: create netlink event when bonding option is changed Userspace needs to be notified if one changes some option. Signed-off-by: Jiri Pirko Acked-by: Veaceslav Falico Acked-by: Andy Gospodarek Signed-off-by: David S. Miller --- drivers/net/bonding/bond_options.c | 2 ++ include/linux/netdevice.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index dc73463c2c23..d8dc17faa6b4 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -625,6 +625,8 @@ int __bond_opt_set(struct bonding *bond, out: if (ret) bond_opt_error_interpret(bond, opt, ret, val); + else + call_netdevice_notifiers(NETDEV_CHANGEINFODATA, bond->dev); return ret; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 38377392d082..7e2b0b8b5cd7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1982,6 +1982,7 @@ struct pcpu_sw_netstats { #define NETDEV_CHANGEUPPER 0x0015 #define NETDEV_RESEND_IGMP 0x0016 #define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */ +#define NETDEV_CHANGEINFODATA 0x0018 int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 989e04c5bc3ff77d65e1f0d87bf7904dfa30d41c Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 22 Aug 2014 14:15:22 -0700 Subject: tcp: improve undo on timeout Upon timeout, undo (via both timestamps/Eifel and DSACKs) was disabled if any retransmits were still in flight. The concern was perhaps that spurious retransmission sent in a previous recovery episode may trigger DSACKs to falsely undo the current recovery. However, this inadvertently misses undo opportunities (using either TCP timestamps or DSACKs) when timeout occurs during a loss episode, i.e. recurring timeouts or timeout during fast recovery. In these cases some retransmissions will be in flight but we should allow undo. Furthermore, we should only reset undo_marker and undo_retrans upon timeout if we are starting a new recovery episode. Finally, when we do reset our undo state, we now do so in a manner similar to tcp_enter_recovery(), so that we require a DSACK for each of the outstsanding retransmissions. This will achieve the original goal by requiring that we receive the same number of DSACKs as retransmissions. This patch increases the undo events by 50% on Google servers. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- net/ipv4/tcp_input.c | 26 +++++++++++--------------- 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fa5258f322e7..e567f0dbf282 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -276,7 +276,7 @@ struct tcp_sock { u32 retrans_stamp; /* Timestamp of the last retransmit, * also used in SYN-SENT to remember stamp of * the first SYN. */ - u32 undo_marker; /* tracking retrans started here. */ + u32 undo_marker; /* snd_una upon a new recovery episode. */ int undo_retrans; /* number of undoable retransmissions. */ u32 total_retrans; /* Total retransmits for entire connection */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a906e0200ff2..aba4926ca095 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1888,21 +1888,21 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) tp->sacked_out = 0; } -static void tcp_clear_retrans_partial(struct tcp_sock *tp) +void tcp_clear_retrans(struct tcp_sock *tp) { tp->retrans_out = 0; tp->lost_out = 0; - tp->undo_marker = 0; tp->undo_retrans = -1; + tp->fackets_out = 0; + tp->sacked_out = 0; } -void tcp_clear_retrans(struct tcp_sock *tp) +static inline void tcp_init_undo(struct tcp_sock *tp) { - tcp_clear_retrans_partial(tp); - - tp->fackets_out = 0; - tp->sacked_out = 0; + tp->undo_marker = tp->snd_una; + /* Retransmission still in flight may cause DSACKs later. */ + tp->undo_retrans = tp->retrans_out ? : -1; } /* Enter Loss state. If we detect SACK reneging, forget all SACK information @@ -1925,18 +1925,18 @@ void tcp_enter_loss(struct sock *sk) tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); + tcp_init_undo(tp); } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; - tcp_clear_retrans_partial(tp); + tp->retrans_out = 0; + tp->lost_out = 0; if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - tp->undo_marker = tp->snd_una; - skb = tcp_write_queue_head(sk); is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); if (is_reneg) { @@ -1950,9 +1950,6 @@ void tcp_enter_loss(struct sock *sk) if (skb == tcp_send_head(sk)) break; - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) - tp->undo_marker = 0; - TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; @@ -2671,8 +2668,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) NET_INC_STATS_BH(sock_net(sk), mib_idx); tp->prior_ssthresh = 0; - tp->undo_marker = tp->snd_una; - tp->undo_retrans = tp->retrans_out ? : -1; + tcp_init_undo(tp); if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { if (!ece_ack) -- cgit v1.2.3 From 53f3cc46336b9e514c98556b4a009a69ed808d3b Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Sat, 23 Aug 2014 14:45:47 +0400 Subject: pata_platform: Remove useless irq_flags field IRQ flags can be obtained from resource structure, there are no need to use additional field in the platform_data to store these values. This patch removes this field and convert existing users of this driver to use IRQ flags from the resources. Signed-off-by: Alexander Shiyan Signed-off-by: Tejun Heo --- arch/blackfin/mach-bf537/boards/cm_bf537e.c | 3 +-- arch/blackfin/mach-bf537/boards/cm_bf537u.c | 3 +-- arch/blackfin/mach-bf537/boards/stamp.c | 3 +-- arch/blackfin/mach-bf537/boards/tcm_bf537.c | 3 +-- arch/blackfin/mach-bf561/boards/cm_bf561.c | 3 +-- drivers/ata/pata_of_platform.c | 2 -- drivers/ata/pata_platform.c | 4 +--- include/linux/ata_platform.h | 5 ----- 8 files changed, 6 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537e.c b/arch/blackfin/mach-bf537/boards/cm_bf537e.c index 1e7290ef3525..1e1014df5e9e 100644 --- a/arch/blackfin/mach-bf537/boards/cm_bf537e.c +++ b/arch/blackfin/mach-bf537/boards/cm_bf537e.c @@ -733,7 +733,6 @@ static struct platform_device bfin_mac_device = { static struct pata_platform_info bfin_pata_platform_data = { .ioport_shift = 2, - .irq_type = IRQF_TRIGGER_HIGH, }; static struct resource bfin_pata_resources[] = { @@ -750,7 +749,7 @@ static struct resource bfin_pata_resources[] = { { .start = PATA_INT, .end = PATA_INT, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537u.c b/arch/blackfin/mach-bf537/boards/cm_bf537u.c index c7495dc74690..d056db9e5592 100644 --- a/arch/blackfin/mach-bf537/boards/cm_bf537u.c +++ b/arch/blackfin/mach-bf537/boards/cm_bf537u.c @@ -587,7 +587,6 @@ static struct platform_device bfin_mac_device = { static struct pata_platform_info bfin_pata_platform_data = { .ioport_shift = 2, - .irq_type = IRQF_TRIGGER_HIGH, }; static struct resource bfin_pata_resources[] = { @@ -604,7 +603,7 @@ static struct resource bfin_pata_resources[] = { { .start = PATA_INT, .end = PATA_INT, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c index de19b8a56007..88a19fc9844d 100644 --- a/arch/blackfin/mach-bf537/boards/stamp.c +++ b/arch/blackfin/mach-bf537/boards/stamp.c @@ -2462,7 +2462,6 @@ static struct platform_device bfin_sport0_device = { #define PATA_INT IRQ_PF5 static struct pata_platform_info bfin_pata_platform_data = { .ioport_shift = 1, - .irq_flags = IRQF_TRIGGER_HIGH, }; static struct resource bfin_pata_resources[] = { @@ -2479,7 +2478,7 @@ static struct resource bfin_pata_resources[] = { { .start = PATA_INT, .end = PATA_INT, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; #elif defined(CF_IDE_NAND_CARD_USE_CF_IN_COMMON_MEMORY_MODE) diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c index 6b988ad653d8..ed309c9a62b6 100644 --- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c +++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c @@ -589,7 +589,6 @@ static struct platform_device bfin_mac_device = { static struct pata_platform_info bfin_pata_platform_data = { .ioport_shift = 2, - .irq_type = IRQF_TRIGGER_HIGH, }; static struct resource bfin_pata_resources[] = { @@ -606,7 +605,7 @@ static struct resource bfin_pata_resources[] = { { .start = PATA_INT, .end = PATA_INT, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c index e862f7823e68..c6db52ba3a06 100644 --- a/arch/blackfin/mach-bf561/boards/cm_bf561.c +++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c @@ -354,7 +354,6 @@ static struct platform_device bfin_sir0_device = { static struct pata_platform_info bfin_pata_platform_data = { .ioport_shift = 2, - .irq_type = IRQF_TRIGGER_HIGH, }; static struct resource bfin_pata_resources[] = { @@ -371,7 +370,7 @@ static struct resource bfin_pata_resources[] = { { .start = PATA_INT, .end = PATA_INT, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; diff --git a/drivers/ata/pata_of_platform.c b/drivers/ata/pata_of_platform.c index 6af1c9b9a464..64965398914a 100644 --- a/drivers/ata/pata_of_platform.c +++ b/drivers/ata/pata_of_platform.c @@ -43,8 +43,6 @@ static int pata_of_platform_probe(struct platform_device *ofdev) } irq_res = platform_get_resource(ofdev, IORESOURCE_IRQ, 0); - if (irq_res) - irq_res->flags = 0; prop = of_get_property(dn, "reg-shift", NULL); if (prop) diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c index a5579b55e332..f8cff3e247c5 100644 --- a/drivers/ata/pata_platform.c +++ b/drivers/ata/pata_platform.c @@ -118,7 +118,7 @@ int __pata_platform_probe(struct device *dev, struct resource *io_res, */ if (irq_res && irq_res->start > 0) { irq = irq_res->start; - irq_flags = irq_res->flags; + irq_flags = irq_res->flags & IRQF_TRIGGER_MASK; } /* @@ -213,8 +213,6 @@ static int pata_platform_probe(struct platform_device *pdev) * And the IRQ */ irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (irq_res) - irq_res->flags = pp_info ? pp_info->irq_flags : 0; return __pata_platform_probe(&pdev->dev, io_res, ctl_res, irq_res, pp_info ? pp_info->ioport_shift : 0, diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h index b9fde17f767c..5c618a084225 100644 --- a/include/linux/ata_platform.h +++ b/include/linux/ata_platform.h @@ -8,11 +8,6 @@ struct pata_platform_info { * spacing used by ata_std_ports(). */ unsigned int ioport_shift; - /* - * Indicate platform specific irq types and initial - * IRQ flags when call request_irq() - */ - unsigned int irq_flags; }; extern int __pata_platform_probe(struct device *dev, -- cgit v1.2.3 From 3af20efc0f83cdc65ce56ec108c0e81f602364df Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 Aug 2014 18:55:39 -0700 Subject: net: phy: broadcom: extract all registers to brcmphy.h Commit 439d39a9ac8fbbba9c04581361188f33f21ced50 ("net: phy: broadcom: extract register definitions") added a bunch of registers to brcmphy.h but left some to broadcom.c, move all of them to the header file since the BCM54xx and BCM7xxx PHY drivers do share all of these registers. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 104 --------------------------------------------- include/linux/brcmphy.h | 103 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 104 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 34088d60da74..6001d76d8676 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -25,110 +25,6 @@ #define BRCM_PHY_REV(phydev) \ ((phydev)->drv->phy_id & ~((phydev)->drv->phy_id_mask)) -/* - * Broadcom LED source encodings. These are used in BCM5461, BCM5481, - * BCM5482, and possibly some others. - */ -#define BCM_LED_SRC_LINKSPD1 0x0 -#define BCM_LED_SRC_LINKSPD2 0x1 -#define BCM_LED_SRC_XMITLED 0x2 -#define BCM_LED_SRC_ACTIVITYLED 0x3 -#define BCM_LED_SRC_FDXLED 0x4 -#define BCM_LED_SRC_SLAVE 0x5 -#define BCM_LED_SRC_INTR 0x6 -#define BCM_LED_SRC_QUALITY 0x7 -#define BCM_LED_SRC_RCVLED 0x8 -#define BCM_LED_SRC_MULTICOLOR1 0xa -#define BCM_LED_SRC_OPENSHORT 0xb -#define BCM_LED_SRC_OFF 0xe /* Tied high */ -#define BCM_LED_SRC_ON 0xf /* Tied low */ - - -/* - * BCM5482: Shadow registers - * Shadow values go into bits [14:10] of register 0x1c to select a shadow - * register to access. - */ -/* 00101: Spare Control Register 3 */ -#define BCM54XX_SHD_SCR3 0x05 -#define BCM54XX_SHD_SCR3_DEF_CLK125 0x0001 -#define BCM54XX_SHD_SCR3_DLLAPD_DIS 0x0002 -#define BCM54XX_SHD_SCR3_TRDDAPD 0x0004 - -/* 01010: Auto Power-Down */ -#define BCM54XX_SHD_APD 0x0a -#define BCM54XX_SHD_APD_EN 0x0020 - -#define BCM5482_SHD_LEDS1 0x0d /* 01101: LED Selector 1 */ - /* LED3 / ~LINKSPD[2] selector */ -#define BCM5482_SHD_LEDS1_LED3(src) ((src & 0xf) << 4) - /* LED1 / ~LINKSPD[1] selector */ -#define BCM5482_SHD_LEDS1_LED1(src) ((src & 0xf) << 0) -#define BCM54XX_SHD_RGMII_MODE 0x0b /* 01011: RGMII Mode Selector */ -#define BCM5482_SHD_SSD 0x14 /* 10100: Secondary SerDes control */ -#define BCM5482_SHD_SSD_LEDM 0x0008 /* SSD LED Mode enable */ -#define BCM5482_SHD_SSD_EN 0x0001 /* SSD enable */ -#define BCM5482_SHD_MODE 0x1f /* 11111: Mode Control Register */ -#define BCM5482_SHD_MODE_1000BX 0x0001 /* Enable 1000BASE-X registers */ - - -/* - * EXPANSION SHADOW ACCESS REGISTERS. (PHY REG 0x15, 0x16, and 0x17) - */ -#define MII_BCM54XX_EXP_AADJ1CH0 0x001f -#define MII_BCM54XX_EXP_AADJ1CH0_SWP_ABCD_OEN 0x0200 -#define MII_BCM54XX_EXP_AADJ1CH0_SWSEL_THPF 0x0100 -#define MII_BCM54XX_EXP_AADJ1CH3 0x601f -#define MII_BCM54XX_EXP_AADJ1CH3_ADCCKADJ 0x0002 -#define MII_BCM54XX_EXP_EXP08 0x0F08 -#define MII_BCM54XX_EXP_EXP08_RJCT_2MHZ 0x0001 -#define MII_BCM54XX_EXP_EXP08_EARLY_DAC_WAKE 0x0200 -#define MII_BCM54XX_EXP_EXP75 0x0f75 -#define MII_BCM54XX_EXP_EXP75_VDACCTRL 0x003c -#define MII_BCM54XX_EXP_EXP75_CM_OSC 0x0001 -#define MII_BCM54XX_EXP_EXP96 0x0f96 -#define MII_BCM54XX_EXP_EXP96_MYST 0x0010 -#define MII_BCM54XX_EXP_EXP97 0x0f97 -#define MII_BCM54XX_EXP_EXP97_MYST 0x0c0c - -/* - * BCM5482: Secondary SerDes registers - */ -#define BCM5482_SSD_1000BX_CTL 0x00 /* 1000BASE-X Control */ -#define BCM5482_SSD_1000BX_CTL_PWRDOWN 0x0800 /* Power-down SSD */ -#define BCM5482_SSD_SGMII_SLAVE 0x15 /* SGMII Slave Register */ -#define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ -#define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ - - -/*****************************************************************************/ -/* Fast Ethernet Transceiver definitions. */ -/*****************************************************************************/ - -#define MII_BRCM_FET_INTREG 0x1a /* Interrupt register */ -#define MII_BRCM_FET_IR_MASK 0x0100 /* Mask all interrupts */ -#define MII_BRCM_FET_IR_LINK_EN 0x0200 /* Link status change enable */ -#define MII_BRCM_FET_IR_SPEED_EN 0x0400 /* Link speed change enable */ -#define MII_BRCM_FET_IR_DUPLEX_EN 0x0800 /* Duplex mode change enable */ -#define MII_BRCM_FET_IR_ENABLE 0x4000 /* Interrupt enable */ - -#define MII_BRCM_FET_BRCMTEST 0x1f /* Brcm test register */ -#define MII_BRCM_FET_BT_SRE 0x0080 /* Shadow register enable */ - - -/*** Shadow register definitions ***/ - -#define MII_BRCM_FET_SHDW_MISCCTRL 0x10 /* Shadow misc ctrl */ -#define MII_BRCM_FET_SHDW_MC_FAME 0x4000 /* Force Auto MDIX enable */ - -#define MII_BRCM_FET_SHDW_AUXMODE4 0x1a /* Auxiliary mode 4 */ -#define MII_BRCM_FET_SHDW_AM4_LED_MASK 0x0003 -#define MII_BRCM_FET_SHDW_AM4_LED_MODE1 0x0001 - -#define MII_BRCM_FET_SHDW_AUXSTAT2 0x1b /* Auxiliary status 2 */ -#define MII_BRCM_FET_SHDW_AS2_APDE 0x0020 /* Auto power down enable */ - - MODULE_DESCRIPTION("Broadcom PHY driver"); MODULE_AUTHOR("Maciej W. Rozycki"); MODULE_LICENSE("GPL"); diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 61219b9b3445..be31bf9f60c2 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -92,4 +92,107 @@ #define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x0000 +/* + * Broadcom LED source encodings. These are used in BCM5461, BCM5481, + * BCM5482, and possibly some others. + */ +#define BCM_LED_SRC_LINKSPD1 0x0 +#define BCM_LED_SRC_LINKSPD2 0x1 +#define BCM_LED_SRC_XMITLED 0x2 +#define BCM_LED_SRC_ACTIVITYLED 0x3 +#define BCM_LED_SRC_FDXLED 0x4 +#define BCM_LED_SRC_SLAVE 0x5 +#define BCM_LED_SRC_INTR 0x6 +#define BCM_LED_SRC_QUALITY 0x7 +#define BCM_LED_SRC_RCVLED 0x8 +#define BCM_LED_SRC_MULTICOLOR1 0xa +#define BCM_LED_SRC_OPENSHORT 0xb +#define BCM_LED_SRC_OFF 0xe /* Tied high */ +#define BCM_LED_SRC_ON 0xf /* Tied low */ + + +/* + * BCM5482: Shadow registers + * Shadow values go into bits [14:10] of register 0x1c to select a shadow + * register to access. + */ +/* 00101: Spare Control Register 3 */ +#define BCM54XX_SHD_SCR3 0x05 +#define BCM54XX_SHD_SCR3_DEF_CLK125 0x0001 +#define BCM54XX_SHD_SCR3_DLLAPD_DIS 0x0002 +#define BCM54XX_SHD_SCR3_TRDDAPD 0x0004 + +/* 01010: Auto Power-Down */ +#define BCM54XX_SHD_APD 0x0a +#define BCM54XX_SHD_APD_EN 0x0020 + +#define BCM5482_SHD_LEDS1 0x0d /* 01101: LED Selector 1 */ + /* LED3 / ~LINKSPD[2] selector */ +#define BCM5482_SHD_LEDS1_LED3(src) ((src & 0xf) << 4) + /* LED1 / ~LINKSPD[1] selector */ +#define BCM5482_SHD_LEDS1_LED1(src) ((src & 0xf) << 0) +#define BCM54XX_SHD_RGMII_MODE 0x0b /* 01011: RGMII Mode Selector */ +#define BCM5482_SHD_SSD 0x14 /* 10100: Secondary SerDes control */ +#define BCM5482_SHD_SSD_LEDM 0x0008 /* SSD LED Mode enable */ +#define BCM5482_SHD_SSD_EN 0x0001 /* SSD enable */ +#define BCM5482_SHD_MODE 0x1f /* 11111: Mode Control Register */ +#define BCM5482_SHD_MODE_1000BX 0x0001 /* Enable 1000BASE-X registers */ + + +/* + * EXPANSION SHADOW ACCESS REGISTERS. (PHY REG 0x15, 0x16, and 0x17) + */ +#define MII_BCM54XX_EXP_AADJ1CH0 0x001f +#define MII_BCM54XX_EXP_AADJ1CH0_SWP_ABCD_OEN 0x0200 +#define MII_BCM54XX_EXP_AADJ1CH0_SWSEL_THPF 0x0100 +#define MII_BCM54XX_EXP_AADJ1CH3 0x601f +#define MII_BCM54XX_EXP_AADJ1CH3_ADCCKADJ 0x0002 +#define MII_BCM54XX_EXP_EXP08 0x0F08 +#define MII_BCM54XX_EXP_EXP08_RJCT_2MHZ 0x0001 +#define MII_BCM54XX_EXP_EXP08_EARLY_DAC_WAKE 0x0200 +#define MII_BCM54XX_EXP_EXP75 0x0f75 +#define MII_BCM54XX_EXP_EXP75_VDACCTRL 0x003c +#define MII_BCM54XX_EXP_EXP75_CM_OSC 0x0001 +#define MII_BCM54XX_EXP_EXP96 0x0f96 +#define MII_BCM54XX_EXP_EXP96_MYST 0x0010 +#define MII_BCM54XX_EXP_EXP97 0x0f97 +#define MII_BCM54XX_EXP_EXP97_MYST 0x0c0c + +/* + * BCM5482: Secondary SerDes registers + */ +#define BCM5482_SSD_1000BX_CTL 0x00 /* 1000BASE-X Control */ +#define BCM5482_SSD_1000BX_CTL_PWRDOWN 0x0800 /* Power-down SSD */ +#define BCM5482_SSD_SGMII_SLAVE 0x15 /* SGMII Slave Register */ +#define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ +#define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ + + +/*****************************************************************************/ +/* Fast Ethernet Transceiver definitions. */ +/*****************************************************************************/ + +#define MII_BRCM_FET_INTREG 0x1a /* Interrupt register */ +#define MII_BRCM_FET_IR_MASK 0x0100 /* Mask all interrupts */ +#define MII_BRCM_FET_IR_LINK_EN 0x0200 /* Link status change enable */ +#define MII_BRCM_FET_IR_SPEED_EN 0x0400 /* Link speed change enable */ +#define MII_BRCM_FET_IR_DUPLEX_EN 0x0800 /* Duplex mode change enable */ +#define MII_BRCM_FET_IR_ENABLE 0x4000 /* Interrupt enable */ + +#define MII_BRCM_FET_BRCMTEST 0x1f /* Brcm test register */ +#define MII_BRCM_FET_BT_SRE 0x0080 /* Shadow register enable */ + + +/*** Shadow register definitions ***/ + +#define MII_BRCM_FET_SHDW_MISCCTRL 0x10 /* Shadow misc ctrl */ +#define MII_BRCM_FET_SHDW_MC_FAME 0x4000 /* Force Auto MDIX enable */ + +#define MII_BRCM_FET_SHDW_AUXMODE4 0x1a /* Auxiliary mode 4 */ +#define MII_BRCM_FET_SHDW_AM4_LED_MASK 0x0003 +#define MII_BRCM_FET_SHDW_AM4_LED_MODE1 0x0001 + +#define MII_BRCM_FET_SHDW_AUXSTAT2 0x1b /* Auxiliary status 2 */ +#define MII_BRCM_FET_SHDW_AS2_APDE 0x0020 /* Auto power down enable */ + #endif /* _LINUX_BRCMPHY_H */ -- cgit v1.2.3 From 705314797b8b997554b7e9d0ea7b65a497356e53 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 Aug 2014 18:55:40 -0700 Subject: net: phy: broadcom: move shadow 0x1C register accessors to brcmphy.h The shadow register 0x1C is used both by the BCM54xxx PHYs and the BCM7xxx internal PHYs, move the accessors to a common location so both drivers can use them. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 18 ------------------ include/linux/brcmphy.h | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 6001d76d8676..854f2c9a7b2b 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -29,24 +29,6 @@ MODULE_DESCRIPTION("Broadcom PHY driver"); MODULE_AUTHOR("Maciej W. Rozycki"); MODULE_LICENSE("GPL"); -/* - * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T - * 0x1c shadow registers. - */ -static int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow) -{ - phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow)); - return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD)); -} - -static int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow, u16 val) -{ - return phy_write(phydev, MII_BCM54XX_SHD, - MII_BCM54XX_SHD_WRITE | - MII_BCM54XX_SHD_VAL(shadow) | - MII_BCM54XX_SHD_DATA(val)); -} - /* Indirect register access functions for the Expansion Registers */ static int bcm54xx_exp_read(struct phy_device *phydev, u16 regnum) { diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index be31bf9f60c2..722cf26567fa 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -195,4 +195,24 @@ #define MII_BRCM_FET_SHDW_AUXSTAT2 0x1b /* Auxiliary status 2 */ #define MII_BRCM_FET_SHDW_AS2_APDE 0x0020 /* Auto power down enable */ +/* + * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T + * 0x1c shadow registers. + */ +static inline int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow) +{ + phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow)); + return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD)); +} + +static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow, + u16 val) +{ + return phy_write(phydev, MII_BCM54XX_SHD, + MII_BCM54XX_SHD_WRITE | + MII_BCM54XX_SHD_VAL(shadow) | + MII_BCM54XX_SHD_DATA(val)); +} + + #endif /* _LINUX_BRCMPHY_H */ -- cgit v1.2.3 From 66ce7fb9807b036058aa380bfd2b3851ae25ce39 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 Aug 2014 18:55:43 -0700 Subject: net: phy: export phy_{read,write}_mmd_indirect Some PHY drivers might need to access Clause 45 registers in Clause 22 compatibility mode to e.g: properly advertise EEE support when disabled by default. Export these two helper functions: phy_read_mmd_indirect() and phy_write_mmd_indirect() for drivers to use them. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 6 ++++-- include/linux/phy.h | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index c94e2a27446a..e7a5893f32ff 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -955,7 +955,7 @@ static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad, * 3) Write reg 13 // MMD Data Command for MMD DEVAD * 3) Read reg 14 // Read MMD data */ -static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, +int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad, int addr) { struct phy_driver *phydrv = phydev->drv; @@ -971,6 +971,7 @@ static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, } return value; } +EXPORT_SYMBOL(phy_read_mmd_indirect); /** * phy_write_mmd_indirect - writes data to the MMD registers @@ -988,7 +989,7 @@ static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, * 3) Write reg 13 // MMD Data Command for MMD DEVAD * 3) Write reg 14 // Write MMD data */ -static void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, +void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, int devad, int addr, u32 data) { struct phy_driver *phydrv = phydev->drv; @@ -1002,6 +1003,7 @@ static void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, phydrv->write_mmd_indirect(phydev, prtad, devad, addr, data); } } +EXPORT_SYMBOL(phy_write_mmd_indirect); /** * phy_init_eee - init and check the EEE feature diff --git a/include/linux/phy.h b/include/linux/phy.h index ed39956b5613..d090cfcaa167 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -597,6 +597,19 @@ static inline int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff)); } +/** + * phy_read_mmd_indirect - reads data from the MMD registers + * @phydev: The PHY device bus + * @prtad: MMD Address + * @devad: MMD DEVAD + * @addr: PHY address on the MII bus + * + * Description: it reads data from the MMD registers (clause 22 to access to + * clause 45) of the specified phy address. + */ +int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, + int devad, int addr); + /** * phy_read - Convenience function for reading a given PHY register * @phydev: the phy_device struct @@ -668,6 +681,20 @@ static inline int phy_write_mmd(struct phy_device *phydev, int devad, return mdiobus_write(phydev->bus, phydev->addr, regnum, val); } +/** + * phy_write_mmd_indirect - writes data to the MMD registers + * @phydev: The PHY device + * @prtad: MMD Address + * @devad: MMD DEVAD + * @addr: PHY address on the MII bus + * @data: data to write in the MMD register + * + * Description: Write data from the MMD registers of the specified + * phy address. + */ +void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, + int devad, int addr, u32 data); + struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids); -- cgit v1.2.3 From b8f9a02924bbeb0c46ca4c19561cbe765b80e264 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 22 Aug 2014 18:55:45 -0700 Subject: net: phy: bcm7xxx: enable EEE at the PHY level The 28nm Gigabit PHY on BCM7xxx chips comes out of reset with absolutely no EEE capabilities, such that we would actually return that we do not support EEE when accessing 3.20 (MDIO_PCS_EEE_ABLE) registers. Poke through the vendor-specific C45 register to enable EEE globally at the PHY level, and advertise supported EEE modes. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 31 +++++++++++++++++++++++++++++++ include/linux/brcmphy.h | 3 +++ 2 files changed, 34 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 29e256a4ed57..e98c510b75c5 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -14,6 +14,7 @@ #include #include #include +#include /* Broadcom BCM7xxx internal PHY registers */ #define MII_BCM7XXX_CHANNEL_WIDTH 0x2000 @@ -167,6 +168,32 @@ static int bcm7xxx_apd_enable(struct phy_device *phydev) return bcm54xx_shadow_write(phydev, BCM54XX_SHD_APD, val); } +static int bcm7xxx_eee_enable(struct phy_device *phydev) +{ + int val; + + val = phy_read_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL, + MDIO_MMD_AN, phydev->addr); + if (val < 0) + return val; + + /* Enable general EEE feature at the PHY level */ + val |= LPI_FEATURE_EN | LPI_FEATURE_EN_DIG1000X; + + phy_write_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL, + MDIO_MMD_AN, phydev->addr, val); + + /* Advertise supported modes */ + val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV, + MDIO_MMD_AN, phydev->addr); + + val |= (MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T); + phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV, + MDIO_MMD_AN, phydev->addr, val); + + return 0; +} + static int bcm7xxx_28nm_config_init(struct phy_device *phydev) { int ret; @@ -179,6 +206,10 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev) if (ret) return ret; + ret = bcm7xxx_eee_enable(phydev); + if (ret) + return ret; + return bcm7xxx_apd_enable(phydev); } diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 722cf26567fa..ee1431d976fa 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -214,5 +214,8 @@ static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow, MII_BCM54XX_SHD_DATA(val)); } +#define BRCM_CL45VEN_EEE_CONTROL 0x803d +#define LPI_FEATURE_EN 0x8000 +#define LPI_FEATURE_EN_DIG1000X 0x4000 #endif /* _LINUX_BRCMPHY_H */ -- cgit v1.2.3 From 690e36e726d00d2528bc569809048adf61550d80 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 23 Aug 2014 12:13:41 -0700 Subject: net: Allow raw buffers to be passed into the flow dissector. Drivers, and perhaps other entities we have not yet considered, sometimes want to know how deep the protocol headers go before deciding how large of an SKB to allocate and how much of the packet to place into the linear SKB area. For example, consider a driver which has a device which DMAs into pools of pages and then tells the driver where the data went in the DMA descriptor(s). The driver can then build an SKB and reference most of the data via SKB fragments (which are page/offset/length triplets). However at least some of the front of the packet should be placed into the linear SKB area, which comes before the fragments, so that packet processing can get at the headers efficiently. The first thing each protocol layer is going to do is a "pskb_may_pull()" so we might as well aggregate as much of this as possible while we're building the SKB in the driver. Part of supporting this is that we don't have an SKB yet, so we want to be able to let the flow dissector operate on a raw buffer in order to compute the offset of the end of the headers. So now we have a __skb_flow_dissect() which takes an explicit data pointer and length. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 ++++++++++++------ include/net/flow_keys.h | 14 ++++++++++++-- net/core/flow_dissector.c | 40 ++++++++++++++++++++++++++-------------- 3 files changed, 50 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index abde271c18ae..18ddf9684a27 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2567,20 +2567,26 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); -static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *buffer) +static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *data, int hlen, void *buffer) { - int hlen = skb_headlen(skb); - if (hlen - offset >= len) - return skb->data + offset; + return data + offset; - if (skb_copy_bits(skb, offset, buffer, len) < 0) + if (!skb || + skb_copy_bits(skb, offset, buffer, len) < 0) return NULL; return buffer; } +static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *buffer) +{ + return __skb_header_pointer(skb, offset, len, skb->data, + skb_headlen(skb), buffer); +} + /** * skb_needs_linearize - check if we need to linearize a given skb * depending on the given device features. diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h index 6667a054763a..4040f63932c5 100644 --- a/include/net/flow_keys.h +++ b/include/net/flow_keys.h @@ -27,7 +27,17 @@ struct flow_keys { u8 ip_proto; }; -bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow); -__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto); +bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, + void *data, int hlen); +static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) +{ + return __skb_flow_dissect(skb, flow, NULL, 0); +} +__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + void *data, int hlen_proto); +static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) +{ + return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); +} u32 flow_hash_from_keys(struct flow_keys *keys); #endif diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 5f362c1d0332..660c6492fb78 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -34,29 +34,40 @@ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *i * The function will try to retrieve the ports at offset thoff + poff where poff * is the protocol port offset returned from proto_ports_offset */ -__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) +__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + void *data, int hlen) { int poff = proto_ports_offset(ip_proto); + if (!data) { + data = skb->data; + hlen = skb_headlen(skb); + } + if (poff >= 0) { __be32 *ports, _ports; - ports = skb_header_pointer(skb, thoff + poff, - sizeof(_ports), &_ports); + ports = __skb_header_pointer(skb, thoff + poff, + sizeof(_ports), data, hlen, &_ports); if (ports) return *ports; } return 0; } -EXPORT_SYMBOL(skb_flow_get_ports); +EXPORT_SYMBOL(__skb_flow_get_ports); -bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) +bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, void *data, int hlen) { int nhoff = skb_network_offset(skb); u8 ip_proto; __be16 proto = skb->protocol; + if (!data) { + data = skb->data; + hlen = skb_headlen(skb); + } + memset(flow, 0, sizeof(*flow)); again: @@ -65,7 +76,7 @@ again: const struct iphdr *iph; struct iphdr _iph; ip: - iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); if (!iph || iph->ihl < 5) return false; nhoff += iph->ihl * 4; @@ -83,7 +94,7 @@ ip: __be32 flow_label; ipv6: - iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); if (!iph) return false; @@ -113,7 +124,7 @@ ipv6: const struct vlan_hdr *vlan; struct vlan_hdr _vlan; - vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan); + vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan); if (!vlan) return false; @@ -126,7 +137,7 @@ ipv6: struct pppoe_hdr hdr; __be16 proto; } *hdr, _hdr; - hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return false; proto = hdr->proto; @@ -151,7 +162,7 @@ ipv6: __be16 proto; } *hdr, _hdr; - hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return false; /* @@ -171,8 +182,9 @@ ipv6: const struct ethhdr *eth; struct ethhdr _eth; - eth = skb_header_pointer(skb, nhoff, - sizeof(_eth), &_eth); + eth = __skb_header_pointer(skb, nhoff, + sizeof(_eth), + data, hlen, &_eth); if (!eth) return false; proto = eth->h_proto; @@ -194,12 +206,12 @@ ipv6: flow->n_proto = proto; flow->ip_proto = ip_proto; - flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto); + flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); flow->thoff = (u16) nhoff; return true; } -EXPORT_SYMBOL(skb_flow_dissect); +EXPORT_SYMBOL(__skb_flow_dissect); static u32 hashrnd __read_mostly; static __always_inline void __flow_hash_secret_init(void) -- cgit v1.2.3 From 179033b3e064d2cd3f5f9945e76b0a0f0fbf4883 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Aug 2014 11:48:26 -0400 Subject: perf: Add PERF_EVENT_STATE_EXIT state for events with exited task Adding new perf event state to indicate that the monitored task has exited. In this case the event stays alive until the owner task exits or close the event fd while providing the last data through the read syscall and ring buffer. Instead it needs to propagate the error info (monitored task has died) via poll and read syscalls by returning POLLHUP and 0 respectively. Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140811120102.GY9918@twins.programming.kicks-ass.net Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-t5y3w8jjx6tfo5w8y6oajsjq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 1 + kernel/events/core.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f0a1036b1911..893a0d07986f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -269,6 +269,7 @@ struct pmu { * enum perf_event_active_state - the states of a event */ enum perf_event_active_state { + PERF_EVENT_STATE_EXIT = -3, PERF_EVENT_STATE_ERROR = -2, PERF_EVENT_STATE_OFF = -1, PERF_EVENT_STATE_INACTIVE = 0, diff --git a/kernel/events/core.c b/kernel/events/core.c index 4575dd6e59ea..d8cb4d21a346 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3600,7 +3600,8 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) * error state (i.e. because it was pinned but it couldn't be * scheduled on to the CPU at some point). */ - if (event->state == PERF_EVENT_STATE_ERROR) + if ((event->state == PERF_EVENT_STATE_ERROR) || + (event->state == PERF_EVENT_STATE_EXIT)) return 0; if (count < event->read_size) @@ -3630,6 +3631,10 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) unsigned int events = POLLHUP; poll_wait(file, &event->waitq, wait); + + if (event->state == PERF_EVENT_STATE_EXIT) + return events; + /* * Pin the event->rb by taking event->mmap_mutex; otherwise * perf_event_set_output() can swizzle our rb and make us miss wakeups. @@ -7588,6 +7593,9 @@ __perf_event_exit_task(struct perf_event *child_event, if (child_event->parent) { sync_child_event(child_event, child); free_event(child_event); + } else { + child_event->state = PERF_EVENT_STATE_EXIT; + perf_event_wakeup(child_event); } } -- cgit v1.2.3 From 1b05756c48ea07ced9604ef01d11194d936da163 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 5 Aug 2014 22:02:34 +0200 Subject: netfilter: ipset: Fix warn: integer overflows 'sizeof(*map) + size * set->dsize' Dan Carpenter reported that the static checker emits the warning net/netfilter/ipset/ip_set_list_set.c:600 init_list_set() warn: integer overflows 'sizeof(*map) + size * set->dsize' Limit the maximal number of elements in list type of sets. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set_list.h | 1 + net/netfilter/ipset/ip_set_list_set.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_list.h b/include/linux/netfilter/ipset/ip_set_list.h index 68c2aea897f5..fe2622a00151 100644 --- a/include/linux/netfilter/ipset/ip_set_list.h +++ b/include/linux/netfilter/ipset/ip_set_list.h @@ -6,5 +6,6 @@ #define IP_SET_LIST_DEFAULT_SIZE 8 #define IP_SET_LIST_MIN_SIZE 4 +#define IP_SET_LIST_MAX_SIZE 65536 #endif /* __IP_SET_LIST_H */ diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 3e2317f3cf68..f87adbad6076 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -597,7 +597,9 @@ init_list_set(struct net *net, struct ip_set *set, u32 size) struct set_elem *e; u32 i; - map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL); + map = kzalloc(sizeof(*map) + + min_t(u32, size, IP_SET_LIST_MAX_SIZE) * set->dsize, + GFP_KERNEL); if (!map) return false; -- cgit v1.2.3 From 573e8fca255a27e3573b51f9b183d62641c47a3d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 22 Aug 2014 13:33:47 -0700 Subject: net: skb_gro_checksum_* functions Add skb_gro_checksum_validate, skb_gro_checksum_validate_zero_check, and skb_gro_checksum_simple_validate, and __skb_gro_checksum_complete. These are the cognates of the normal checksum functions but are used in the gro_receive path and operate on GRO related fields in sk_buffs. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 76 +++++++++++++++++++++++++++++++++++++++++++++-- net/core/dev.c | 34 ++++++++++++++++++++- 2 files changed, 107 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7e2b0b8b5cd7..eb73444e1bd0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1883,7 +1883,13 @@ struct napi_gro_cb { u16 proto; /* Used in udp_gro_receive */ - u16 udp_mark; + u8 udp_mark:1; + + /* GRO checksum is valid */ + u8 csum_valid:1; + + /* Number encapsulation layers crossed */ + u8 encapsulation; /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2154,11 +2160,77 @@ static inline void *skb_gro_network_header(struct sk_buff *skb) static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_COMPLETE) + if (NAPI_GRO_CB(skb)->csum_valid) NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum, csum_partial(start, len, 0)); } +/* GRO checksum functions. These are logical equivalents of the normal + * checksum functions (in skbuff.h) except that they operate on the GRO + * offsets and fields in sk_buff. + */ + +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb); + +static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, + bool zero_okay, + __sum16 check) +{ + return (skb->ip_summed != CHECKSUM_PARTIAL && + (skb->ip_summed != CHECKSUM_UNNECESSARY || + (NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) && + (!zero_okay || check)); +} + +static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, + __wsum psum) +{ + if (NAPI_GRO_CB(skb)->csum_valid && + !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) + return 0; + + NAPI_GRO_CB(skb)->csum = psum; + + return __skb_gro_checksum_complete(skb); +} + +/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level + * checksum or an encapsulated one during GRO. This saves work + * if we fallback to normal path with the packet. + */ +static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (NAPI_GRO_CB(skb)->encapsulation) + skb->encapsulation = 1; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->encapsulation = 0; + } +} + +#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ + compute_pseudo) \ +({ \ + __sum16 __ret = 0; \ + if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ + __ret = __skb_gro_checksum_validate_complete(skb, \ + compute_pseudo(skb, proto)); \ + if (!__ret) \ + skb_gro_incr_csum_unnecessary(skb); \ + __ret; \ +}) + +#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) + +#define skb_gro_checksum_validate_zero_check(skb, proto, check, \ + compute_pseudo) \ + __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) + +#define skb_gro_checksum_simple_validate(skb) \ + __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, diff --git a/net/core/dev.c b/net/core/dev.c index 1421dad4cb29..b6a718ec11c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3962,7 +3962,13 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff goto normal; gro_list_prepare(napi, skb); - NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ + + if (skb->ip_summed == CHECKSUM_COMPLETE) { + NAPI_GRO_CB(skb)->csum = skb->csum; + NAPI_GRO_CB(skb)->csum_valid = 1; + } else { + NAPI_GRO_CB(skb)->csum_valid = 0; + } rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -3975,6 +3981,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->udp_mark = 0; + NAPI_GRO_CB(skb)->encapsulation = 0; pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; @@ -4205,6 +4212,31 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_frags); +/* Compute the checksum from gro_offset and return the folded value + * after adding in any pseudo checksum. + */ +__sum16 __skb_gro_checksum_complete(struct sk_buff *skb) +{ + __wsum wsum; + __sum16 sum; + + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0); + + /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */ + sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum)); + if (likely(!sum)) { + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev); + } + + NAPI_GRO_CB(skb)->csum = wsum; + NAPI_GRO_CB(skb)->csum_valid = 1; + + return sum; +} +EXPORT_SYMBOL(__skb_gro_checksum_complete); + /* * net_rps_action_and_irq_enable sends any pending IPI's for rps. * Note: called with local irq disabled, but exits with local irq enabled. -- cgit v1.2.3 From a98406e22c12e514bac28fec0a49dc793edaf3a8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 23 Aug 2014 17:03:28 +0200 Subject: random32: improvements to prandom_bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch addresses a couple of minor items, mostly addesssing prandom_bytes(): 1) prandom_bytes{,_state}() should use size_t for length arguments, 2) We can use put_unaligned() when filling the array instead of open coding it [ perhaps some archs will further benefit from their own arch specific implementation when GCC cannot make up for it ], 3) Fix a typo, 4) Better use unsigned int as type for getting the arch seed, 5) Make use of prandom_u32_max() for timer slack. Regarding the change to put_unaligned(), callers of prandom_bytes() which internally invoke prandom_bytes_state(), don't bother as they expect the array to be filled randomly and don't have any control of the internal state what-so-ever (that's also why we have periodic reseeding there, etc), so they really don't care. Now for the direct callers of prandom_bytes_state(), which are solely located in test cases for MTD devices, that is, drivers/mtd/tests/{oobtest.c,pagetest.c,subpagetest.c}: These tests basically fill a test write-vector through prandom_bytes_state() with an a-priori defined seed each time and write that to a MTD device. Later on, they set up a read-vector and read back that blocks from the device. So in the verification phase, the write-vector is being re-setup [ so same seed and prandom_bytes_state() called ], and then memcmp()'ed against the read-vector to check if the data is the same. Akinobu, Lothar and I also tested this patch and it runs through the 3 relevant MTD test cases w/o any errors on the nandsim device (simulator for MTD devs) for x86_64, ppc64, ARM (i.MX28, i.MX53 and i.MX6): # modprobe nandsim first_id_byte=0x20 second_id_byte=0xac \ third_id_byte=0x00 fourth_id_byte=0x15 # modprobe mtd_oobtest dev=0 # modprobe mtd_pagetest dev=0 # modprobe mtd_subpagetest dev=0 We also don't have any users depending directly on a particular result of the PRNG (except the PRNG self-test itself), and that's just fine as it e.g. allowed us easily to do things like upgrading from taus88 to taus113. Signed-off-by: Daniel Borkmann Tested-by: Akinobu Mita Tested-by: Lothar Waßmann Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/random.h | 4 ++-- lib/random32.c | 39 ++++++++++++++++++--------------------- 2 files changed, 20 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 57fbbffd77a0..b05856e16b75 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -26,7 +26,7 @@ unsigned int get_random_int(void); unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len); u32 prandom_u32(void); -void prandom_bytes(void *buf, int nbytes); +void prandom_bytes(void *buf, size_t nbytes); void prandom_seed(u32 seed); void prandom_reseed_late(void); @@ -35,7 +35,7 @@ struct rnd_state { }; u32 prandom_u32_state(struct rnd_state *state); -void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes); +void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); /** * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) diff --git a/lib/random32.c b/lib/random32.c index c9b6bf3afe0c..0bee183fa18f 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -37,6 +37,7 @@ #include #include #include +#include #ifdef CONFIG_RANDOM32_SELFTEST static void __init prandom_state_selftest(void); @@ -96,27 +97,23 @@ EXPORT_SYMBOL(prandom_u32); * This is used for pseudo-randomness with no outside seeding. * For more random results, use prandom_bytes(). */ -void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes) +void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes) { - unsigned char *p = buf; - int i; - - for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) { - u32 random = prandom_u32_state(state); - int j; + u8 *ptr = buf; - for (j = 0; j < sizeof(u32); j++) { - p[i + j] = random; - random >>= BITS_PER_BYTE; - } + while (bytes >= sizeof(u32)) { + put_unaligned(prandom_u32_state(state), (u32 *) ptr); + ptr += sizeof(u32); + bytes -= sizeof(u32); } - if (i < bytes) { - u32 random = prandom_u32_state(state); - for (; i < bytes; i++) { - p[i] = random; - random >>= BITS_PER_BYTE; - } + if (bytes > 0) { + u32 rem = prandom_u32_state(state); + do { + *ptr++ = (u8) rem; + bytes--; + rem >>= BITS_PER_BYTE; + } while (bytes > 0); } } EXPORT_SYMBOL(prandom_bytes_state); @@ -126,7 +123,7 @@ EXPORT_SYMBOL(prandom_bytes_state); * @buf: where to copy the pseudo-random bytes to * @bytes: the requested number of bytes */ -void prandom_bytes(void *buf, int bytes) +void prandom_bytes(void *buf, size_t bytes) { struct rnd_state *state = &get_cpu_var(net_rand_state); @@ -137,7 +134,7 @@ EXPORT_SYMBOL(prandom_bytes); static void prandom_warmup(struct rnd_state *state) { - /* Calling RNG ten times to satify recurrence condition */ + /* Calling RNG ten times to satisfy recurrence condition */ prandom_u32_state(state); prandom_u32_state(state); prandom_u32_state(state); @@ -152,7 +149,7 @@ static void prandom_warmup(struct rnd_state *state) static u32 __extract_hwseed(void) { - u32 val = 0; + unsigned int val = 0; (void)(arch_get_random_seed_int(&val) || arch_get_random_int(&val)); @@ -228,7 +225,7 @@ static void __prandom_timer(unsigned long dontcare) prandom_seed(entropy); /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ - expires = 40 + (prandom_u32() % 40); + expires = 40 + prandom_u32_max(40); seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC); add_timer(&seed_timer); -- cgit v1.2.3 From 4798248e4e023170e937a65a1d30fcc52496dd42 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 22 Aug 2014 16:21:53 -0700 Subject: net: Add ops->ndo_xmit_flush() Signed-off-by: David S. Miller --- drivers/net/wan/dlci.c | 2 +- drivers/usb/gadget/function/f_ncm.c | 2 +- include/linux/netdevice.h | 35 +++++++++++++++++++++++++++++++++++ net/atm/mpc.c | 2 +- net/core/dev.c | 5 ++--- net/core/netpoll.c | 3 +-- net/core/pktgen.c | 4 +--- net/packet/af_packet.c | 3 +-- net/sched/sch_teql.c | 3 +-- 9 files changed, 44 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c index 43c9960dce1c..81b22a180aad 100644 --- a/drivers/net/wan/dlci.c +++ b/drivers/net/wan/dlci.c @@ -193,7 +193,7 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev) struct dlci_local *dlp = netdev_priv(dev); if (skb) - dlp->slave->netdev_ops->ndo_start_xmit(skb, dlp->slave); + netdev_start_xmit(skb, dlp->slave); return NETDEV_TX_OK; } diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index bcdc882cd415..cb5d646db6a7 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1101,7 +1101,7 @@ static void ncm_tx_tasklet(unsigned long data) /* Only send if data is available. */ if (ncm->skb_tx_data) { ncm->timer_force_tx = true; - ncm->netdev->netdev_ops->ndo_start_xmit(NULL, ncm->netdev); + netdev_start_xmit(NULL, ncm->netdev); ncm->timer_force_tx = false; } } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index eb73444e1bd0..220c50984688 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -782,6 +782,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required can not be NULL. * + * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue); + * A driver implements this function when it wishes to support + * deferred TX queue flushing. The idea is that the expensive + * operation to trigger TX queue processing can be done after + * N calls to ndo_start_xmit rather than being done every single + * time. In this regime ndo_start_xmit will be called one or more + * times, and then a final ndo_xmit_flush call will be made to + * have the driver tell the device about the new pending TX queue + * entries. The kernel keeps track of which queues need flushing + * by monitoring skb->queue_mapping of the packets it submits to + * ndo_start_xmit. This is the queue value that will be passed + * to ndo_xmit_flush. + * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, * void *accel_priv, select_queue_fallback_t fallback); * Called to decide which queue to when device supports multiple @@ -1005,6 +1018,7 @@ struct net_device_ops { int (*ndo_stop)(struct net_device *dev); netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb, struct net_device *dev); + void (*ndo_xmit_flush)(struct net_device *dev, u16 queue); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, void *accel_priv, @@ -3430,6 +3444,27 @@ int __init dev_proc_init(void); #define dev_proc_init() 0 #endif +static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, + struct sk_buff *skb, struct net_device *dev) +{ + netdev_tx_t ret; + u16 q; + + q = skb->queue_mapping; + ret = ops->ndo_start_xmit(skb, dev); + if (dev_xmit_complete(ret) && ops->ndo_xmit_flush) + ops->ndo_xmit_flush(dev, q); + + return ret; +} + +static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + return __netdev_start_xmit(ops, skb, dev); +} + int netdev_class_create_file_ns(struct class_attribute *class_attr, const void *ns); void netdev_class_remove_file_ns(struct class_attribute *class_attr, diff --git a/net/atm/mpc.c b/net/atm/mpc.c index e8e0e7a8a23d..d662da161e5a 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb, } non_ip: - return mpc->old_ops->ndo_start_xmit(skb, dev); + return __netdev_start_xmit(mpc->old_ops, skb, dev); } static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) diff --git a/net/core/dev.c b/net/core/dev.c index b6a718ec11c1..26d296c2447c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2602,7 +2602,6 @@ EXPORT_SYMBOL(netif_skb_features); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { - const struct net_device_ops *ops = dev->netdev_ops; int rc = NETDEV_TX_OK; unsigned int skb_len; @@ -2667,7 +2666,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb_len = skb->len; trace_net_dev_start_xmit(skb, dev); - rc = ops->ndo_start_xmit(skb, dev); + rc = netdev_start_xmit(skb, dev); trace_net_dev_xmit(skb, rc, dev, skb_len); if (rc == NETDEV_TX_OK) txq_trans_update(txq); @@ -2686,7 +2685,7 @@ gso: skb_len = nskb->len; trace_net_dev_start_xmit(nskb, dev); - rc = ops->ndo_start_xmit(nskb, dev); + rc = netdev_start_xmit(nskb, dev); trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 907fb5e36c02..a5ad06828d67 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -72,7 +72,6 @@ module_param(carrier_timeout, uint, 0644); static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { - const struct net_device_ops *ops = dev->netdev_ops; int status = NETDEV_TX_OK; netdev_features_t features; @@ -92,7 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } - status = ops->ndo_start_xmit(skb, dev); + status = netdev_start_xmit(skb, dev); if (status == NETDEV_TX_OK) txq_trans_update(txq); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8b849ddfef2e..83e2b4b19eb7 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3285,8 +3285,6 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) static void pktgen_xmit(struct pktgen_dev *pkt_dev) { struct net_device *odev = pkt_dev->odev; - netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *) - = odev->netdev_ops->ndo_start_xmit; struct netdev_queue *txq; u16 queue_map; int ret; @@ -3339,7 +3337,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) goto unlock; } atomic_inc(&(pkt_dev->skb->users)); - ret = (*xmit)(pkt_dev->skb, odev); + ret = netdev_start_xmit(pkt_dev->skb, odev); switch (ret) { case NETDEV_TX_OK: diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 93896d2092f6..0dfa990d4eaa 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -240,7 +240,6 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; - const struct net_device_ops *ops = dev->netdev_ops; netdev_features_t features; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; @@ -262,7 +261,7 @@ static int packet_direct_xmit(struct sk_buff *skb) HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) { - ret = ops->ndo_start_xmit(skb, dev); + ret = netdev_start_xmit(skb, dev); if (ret == NETDEV_TX_OK) txq_trans_update(txq); } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index bd33793b527e..64cd93ca8104 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -301,7 +301,6 @@ restart: do { struct net_device *slave = qdisc_dev(q); struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); - const struct net_device_ops *slave_ops = slave->netdev_ops; if (slave_txq->qdisc_sleeping != q) continue; @@ -317,7 +316,7 @@ restart: unsigned int length = qdisc_pkt_len(skb); if (!netif_xmit_frozen_or_stopped(slave_txq) && - slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { + netdev_start_xmit(skb, slave) == NETDEV_TX_OK) { txq_trans_update(slave_txq); __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); -- cgit v1.2.3 From 2ee507c472939db4b146d545352b8a7c79ef47f8 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Thu, 31 Jul 2014 10:29:48 -0700 Subject: sched: Add function single_task_running to let a task check if it is the only task running on a cpu This function will help an async task processing batched jobs from workqueue decide if it wants to keep processing on more chunks of batched work that can be delayed, or to accumulate more work for more efficient batched processing later. If no other tasks are running on the cpu, the batching process can take advantgae of the available cpu cycles to a make decision to continue processing the existing accumulated work to minimize delay, otherwise it will yield. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- include/linux/sched.h | 1 + kernel/sched/core.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c2c885ee52b..e6d2c056d8e0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -167,6 +167,7 @@ extern int nr_threads; DECLARE_PER_CPU(unsigned long, process_counts); extern int nr_processes(void); extern unsigned long nr_running(void); +extern bool single_task_running(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ec1a286684a5..59965ec0b7de 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2366,6 +2366,18 @@ unsigned long nr_running(void) return sum; } +/* + * Check if only the current task is running on the cpu. + */ +bool single_task_running(void) +{ + if (cpu_rq(smp_processor_id())->nr_running == 1) + return true; + else + return false; +} +EXPORT_SYMBOL(single_task_running); + unsigned long long nr_context_switches(void) { int i; -- cgit v1.2.3 From 48ea526a6877d605c961aa37fae33f3227b29424 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Mon, 25 Aug 2014 16:06:53 +0300 Subject: net/mlx4: Use is_kdump_kernel() to detect kdump kernel Use is_kdump_kernel() to detect kdump kernel, instead of reset_devices. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 071f6b234604..783dd099abd1 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -38,6 +38,7 @@ #include #include #include +#include #include @@ -1275,7 +1276,7 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr, /* Returns true if running in low memory profile (kdump kernel) */ static inline bool mlx4_low_memory_profile(void) { - return reset_devices; + return is_kdump_kernel(); } #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 0b725a2ca61bedc33a2a63d0451d528b268cf975 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 25 Aug 2014 15:51:53 -0700 Subject: net: Remove ndo_xmit_flush netdev operation, use signalling instead. As reported by Jesper Dangaard Brouer, for high packet rates the overhead of having another indirect call in the TX path is non-trivial. There is the indirect call itself, and then there is all of the reloading of the state to refetch the tail pointer value and then write the device register. Move to a more passive scheme, which requires very light modifications to the device drivers. The signal is a new skb->xmit_more value, if it is non-zero it means that more SKBs are pending to be transmitted on the same queue as the current SKB. And therefore, the driver may elide the tail pointer update. Right now skb->xmit_more is always zero. Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_main.c | 36 +++++++++++-------------------- drivers/net/virtio_net.c | 12 +++-------- include/linux/netdevice.h | 25 ++------------------- include/linux/skbuff.h | 2 ++ 4 files changed, 19 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index b9c020a05fb8..89c29b40d61c 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -136,7 +136,6 @@ static void igb_update_phy_info(unsigned long); static void igb_watchdog(unsigned long); static void igb_watchdog_task(struct work_struct *); static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); -static void igb_xmit_flush(struct net_device *netdev, u16 queue); static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); static int igb_change_mtu(struct net_device *, int); @@ -2076,7 +2075,6 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_open = igb_open, .ndo_stop = igb_close, .ndo_start_xmit = igb_xmit_frame, - .ndo_xmit_flush = igb_xmit_flush, .ndo_get_stats64 = igb_get_stats64, .ndo_set_rx_mode = igb_set_rx_mode, .ndo_set_mac_address = igb_set_mac, @@ -4917,6 +4915,14 @@ static void igb_tx_map(struct igb_ring *tx_ring, tx_ring->next_to_use = i; + if (!skb->xmit_more) { + writel(i, tx_ring->tail); + + /* we need this if more than one processor can write to our tail + * at a time, it synchronizes IO on IA64/Altix systems + */ + mmiowb(); + } return; dma_error: @@ -5052,20 +5058,17 @@ out_drop: return NETDEV_TX_OK; } -static struct igb_ring *__igb_tx_queue_mapping(struct igb_adapter *adapter, unsigned int r_idx) +static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, + struct sk_buff *skb) { + unsigned int r_idx = skb->queue_mapping; + if (r_idx >= adapter->num_tx_queues) r_idx = r_idx % adapter->num_tx_queues; return adapter->tx_ring[r_idx]; } -static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, - struct sk_buff *skb) -{ - return __igb_tx_queue_mapping(adapter, skb->queue_mapping); -} - static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { @@ -5094,21 +5097,6 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); } -static void igb_xmit_flush(struct net_device *netdev, u16 queue) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct igb_ring *tx_ring; - - tx_ring = __igb_tx_queue_mapping(adapter, queue); - - writel(tx_ring->next_to_use, tx_ring->tail); - - /* we need this if more than one processor can write to our tail - * at a time, it synchronizes IO on IA64/Altix systems - */ - mmiowb(); -} - /** * igb_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 62421086d3e6..f0c2824f5e0f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -953,15 +953,10 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) } } - return NETDEV_TX_OK; -} + if (!skb->xmit_more) + virtqueue_kick(sq->vq); -static void xmit_flush(struct net_device *dev, u16 qnum) -{ - struct virtnet_info *vi = netdev_priv(dev); - struct send_queue *sq = &vi->sq[qnum]; - - virtqueue_kick(sq->vq); + return NETDEV_TX_OK; } /* @@ -1393,7 +1388,6 @@ static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, .ndo_start_xmit = start_xmit, - .ndo_xmit_flush = xmit_flush, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = virtnet_set_mac_address, .ndo_set_rx_mode = virtnet_set_rx_mode, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 220c50984688..039b23786c22 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -782,19 +782,6 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required can not be NULL. * - * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue); - * A driver implements this function when it wishes to support - * deferred TX queue flushing. The idea is that the expensive - * operation to trigger TX queue processing can be done after - * N calls to ndo_start_xmit rather than being done every single - * time. In this regime ndo_start_xmit will be called one or more - * times, and then a final ndo_xmit_flush call will be made to - * have the driver tell the device about the new pending TX queue - * entries. The kernel keeps track of which queues need flushing - * by monitoring skb->queue_mapping of the packets it submits to - * ndo_start_xmit. This is the queue value that will be passed - * to ndo_xmit_flush. - * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, * void *accel_priv, select_queue_fallback_t fallback); * Called to decide which queue to when device supports multiple @@ -1018,7 +1005,6 @@ struct net_device_ops { int (*ndo_stop)(struct net_device *dev); netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb, struct net_device *dev); - void (*ndo_xmit_flush)(struct net_device *dev, u16 queue); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, void *accel_priv, @@ -3447,15 +3433,8 @@ int __init dev_proc_init(void); static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, struct sk_buff *skb, struct net_device *dev) { - netdev_tx_t ret; - u16 q; - - q = skb->queue_mapping; - ret = ops->ndo_start_xmit(skb, dev); - if (dev_xmit_complete(ret) && ops->ndo_xmit_flush) - ops->ndo_xmit_flush(dev, q); - - return ret; + skb->xmit_more = 0; + return ops->ndo_start_xmit(skb, dev); } static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 18ddf9684a27..9b3802a197a8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -452,6 +452,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @tc_verd: traffic control verdict * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices + * @xmit_more: More SKBs are pending for this queue * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -558,6 +559,7 @@ struct sk_buff { __u16 queue_mapping; kmemcheck_bitfield_begin(flags2); + __u8 xmit_more:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif -- cgit v1.2.3 From b4bbb107d73bbc0d92c9ae7fd8e69580aa9381e7 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 27 Jun 2014 11:56:58 +0200 Subject: dma-mapping: Provide write-combine allocations Provide an implementation for dma_{alloc,free,mmap}_writecombine() when the architecture supports DMA attributes. Signed-off-by: Thierry Reding Acked-by: Arnd Bergmann Signed-off-by: Marek Szyprowski --- arch/arm/include/asm/dma-mapping.h | 16 ---------------- include/asm-generic/dma-mapping-common.h | 8 -------- include/linux/dma-mapping.h | 26 ++++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index c45b61a4b4a5..85738b200023 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -265,22 +265,6 @@ extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs); -static inline void *dma_alloc_writecombine(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) -{ - DEFINE_DMA_ATTRS(attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - return dma_alloc_attrs(dev, size, dma_handle, flag, &attrs); -} - -static inline void dma_free_writecombine(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle) -{ - DEFINE_DMA_ATTRS(attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); -} - /* * This can be called during early boot to increase the size of the atomic * coherent DMA pool above the default value of 256KiB. It must be called diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h index de8bf89940f8..d137431bf26f 100644 --- a/include/asm-generic/dma-mapping-common.h +++ b/include/asm-generic/dma-mapping-common.h @@ -205,14 +205,6 @@ dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL) -static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - DEFINE_DMA_ATTRS(attrs); - dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); - return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs); -} - int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 931b70986272..d5d388160f42 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -263,6 +263,32 @@ struct dma_attrs; #define dma_unmap_sg_attrs(dev, sgl, nents, dir, attrs) \ dma_unmap_sg(dev, sgl, nents, dir) +#else +static inline void *dma_alloc_writecombine(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + return dma_alloc_attrs(dev, size, dma_addr, gfp, &attrs); +} + +static inline void dma_free_writecombine(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + return dma_free_attrs(dev, size, cpu_addr, dma_addr, &attrs); +} + +static inline int dma_mmap_writecombine(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, + size_t size) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs); +} #endif /* CONFIG_HAVE_DMA_ATTRS */ #ifdef CONFIG_NEED_DMA_MAP_STATE -- cgit v1.2.3 From 50d8a189013cef83eef771c45787cee68ecdf8fe Mon Sep 17 00:00:00 2001 From: "Raymond L. Rivera" Date: Thu, 24 Jul 2014 02:39:45 -0700 Subject: linux/pagemap.h: Fixed a typo in a code comment. Corrected a minor typo in a code comment where 'be' was missing. Signed-off-by: Raymond L. Rivera Signed-off-by: Jiri Kosina --- include/linux/pagemap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e1474ae18c88..bf657ff3208c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -96,7 +96,7 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) } /* - * The page cache can done in larger chunks than + * The page cache can be done in larger chunks than * one page, because it allows for more efficient * throughput (it can then be mapped into user * space in smaller chunks for same flexibility). -- cgit v1.2.3 From 170fd0b1f6108b48df4369afa0ee29a83e922748 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 30 Jul 2014 14:36:18 +0300 Subject: ieee80211: Support parsing TPC report element in action frames TPC report element is contained in spectrum management's tpc report action frames and in radio measurement's link measurement report action frames. Add a function which checks whether an action frame contains this element. This may be needed by the drivers in order to set the correct tx power value in these frames. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 63ab3873c5ed..8018c915ee63 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -838,6 +838,16 @@ enum ieee80211_vht_opmode_bits { #define WLAN_SA_QUERY_TR_ID_LEN 2 +/** + * struct ieee80211_tpc_report_ie + * + * This structure refers to "TPC Report element" + */ +struct ieee80211_tpc_report_ie { + u8 tx_power; + u8 link_margin; +} __packed; + struct ieee80211_mgmt { __le16 frame_control; __le16 duration; @@ -973,6 +983,13 @@ struct ieee80211_mgmt { u8 action_code; u8 operating_mode; } __packed vht_opmode_notif; + struct { + u8 action_code; + u8 dialog_token; + u8 tpc_elem_id; + u8 tpc_elem_length; + struct ieee80211_tpc_report_ie tpc; + } __packed tpc_report; } u; } __packed action; } u; @@ -1865,6 +1882,7 @@ enum ieee80211_category { WLAN_CATEGORY_DLS = 2, WLAN_CATEGORY_BACK = 3, WLAN_CATEGORY_PUBLIC = 4, + WLAN_CATEGORY_RADIO_MEASUREMENT = 5, WLAN_CATEGORY_HT = 7, WLAN_CATEGORY_SA_QUERY = 8, WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9, @@ -2378,4 +2396,51 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, #define TU_TO_JIFFIES(x) (usecs_to_jiffies((x) * 1024)) #define TU_TO_EXP_TIME(x) (jiffies + TU_TO_JIFFIES(x)) +/** + * ieee80211_action_contains_tpc - checks if the frame contains TPC element + * @skb: the skb containing the frame, length will be checked + * + * This function checks if it's either TPC report action frame or Link + * Measurement report action frame as defined in IEEE Std. 802.11-2012 8.5.2.5 + * and 8.5.7.5 accordingly. + */ +static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + + if (!ieee80211_is_action(mgmt->frame_control)) + return false; + + if (skb->len < IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.tpc_report)) + return false; + + /* + * TPC report - check that: + * category = 0 (Spectrum Management) or 5 (Radio Measurement) + * spectrum management action = 3 (TPC/Link Measurement report) + * TPC report EID = 35 + * TPC report element length = 2 + * + * The spectrum management's tpc_report struct is used here both for + * parsing tpc_report and radio measurement's link measurement report + * frame, since the relevant part is identical in both frames. + */ + if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT && + mgmt->u.action.category != WLAN_CATEGORY_RADIO_MEASUREMENT) + return false; + + /* both spectrum mgmt and link measurement have same action code */ + if (mgmt->u.action.u.tpc_report.action_code != + WLAN_ACTION_SPCT_TPC_RPRT) + return false; + + if (mgmt->u.action.u.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT || + mgmt->u.action.u.tpc_report.tpc_elem_length != + sizeof(struct ieee80211_tpc_report_ie)) + return false; + + return true; +} + #endif /* LINUX_IEEE80211_H */ -- cgit v1.2.3 From 4a32fea9d78f2d2315c0072757b197d5a304dc8b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:27 -0500 Subject: scheduler: Replace __get_cpu_var with this_cpu_ptr Convert all uses of __get_cpu_var for address calculation to use this_cpu_ptr instead. [Uses of __get_cpu_var with cpumask_var_t are no longer handled by this patch] Cc: Peter Zijlstra Acked-by: Ingo Molnar Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/kernel_stat.h | 4 ++-- kernel/events/callchain.c | 4 ++-- kernel/events/core.c | 24 ++++++++++++------------ kernel/sched/sched.h | 4 ++-- kernel/taskstats.c | 2 +- kernel/time/tick-sched.c | 4 ++-- kernel/user-return-notifier.c | 4 ++-- 7 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index ecbc52f9ff77..8422b4ed6882 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -44,8 +44,8 @@ DECLARE_PER_CPU(struct kernel_stat, kstat); DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat); /* Must have preemption disabled for this to be meaningful. */ -#define kstat_this_cpu (&__get_cpu_var(kstat)) -#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat)) +#define kstat_this_cpu this_cpu_ptr(&kstat) +#define kcpustat_this_cpu this_cpu_ptr(&kernel_cpustat) #define kstat_cpu(cpu) per_cpu(kstat, cpu) #define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 97b67df8fbfe..c4f63e68a35c 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -137,7 +137,7 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx) int cpu; struct callchain_cpus_entries *entries; - *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); + *rctx = get_recursion_context(this_cpu_ptr(callchain_recursion)); if (*rctx == -1) return NULL; @@ -153,7 +153,7 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx) static void put_callchain_entry(int rctx) { - put_recursion_context(__get_cpu_var(callchain_recursion), rctx); + put_recursion_context(this_cpu_ptr(callchain_recursion), rctx); } struct perf_callchain_entry * diff --git a/kernel/events/core.c b/kernel/events/core.c index 1cf24b3e42ec..4d44e40a0483 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -239,7 +239,7 @@ static void perf_duration_warn(struct irq_work *w) u64 avg_local_sample_len; u64 local_samples_len; - local_samples_len = __get_cpu_var(running_sample_length); + local_samples_len = __this_cpu_read(running_sample_length); avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; printk_ratelimited(KERN_WARNING @@ -261,10 +261,10 @@ void perf_sample_event_took(u64 sample_len_ns) return; /* decay the counter by 1 average sample */ - local_samples_len = __get_cpu_var(running_sample_length); + local_samples_len = __this_cpu_read(running_sample_length); local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES; local_samples_len += sample_len_ns; - __get_cpu_var(running_sample_length) = local_samples_len; + __this_cpu_write(running_sample_length, local_samples_len); /* * note: this will be biased artifically low until we have @@ -877,7 +877,7 @@ static DEFINE_PER_CPU(struct list_head, rotation_list); static void perf_pmu_rotate_start(struct pmu *pmu) { struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - struct list_head *head = &__get_cpu_var(rotation_list); + struct list_head *head = this_cpu_ptr(&rotation_list); WARN_ON(!irqs_disabled()); @@ -2389,7 +2389,7 @@ void __perf_event_task_sched_out(struct task_struct *task, * to check if we have to switch out PMU state. * cgroup event are system-wide mode only */ - if (atomic_read(&__get_cpu_var(perf_cgroup_events))) + if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) perf_cgroup_sched_out(task, next); } @@ -2632,11 +2632,11 @@ void __perf_event_task_sched_in(struct task_struct *prev, * to check if we have to switch in PMU state. * cgroup event are system-wide mode only */ - if (atomic_read(&__get_cpu_var(perf_cgroup_events))) + if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) perf_cgroup_sched_in(prev, task); /* check for system-wide branch_stack events */ - if (atomic_read(&__get_cpu_var(perf_branch_stack_events))) + if (atomic_read(this_cpu_ptr(&perf_branch_stack_events))) perf_branch_stack_sched_in(prev, task); } @@ -2891,7 +2891,7 @@ bool perf_event_can_stop_tick(void) void perf_event_task_tick(void) { - struct list_head *head = &__get_cpu_var(rotation_list); + struct list_head *head = this_cpu_ptr(&rotation_list); struct perf_cpu_context *cpuctx, *tmp; struct perf_event_context *ctx; int throttled; @@ -5671,7 +5671,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, struct perf_sample_data *data, struct pt_regs *regs) { - struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); struct perf_event *event; struct hlist_head *head; @@ -5690,7 +5690,7 @@ end: int perf_swevent_get_recursion_context(void) { - struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); return get_recursion_context(swhash->recursion); } @@ -5698,7 +5698,7 @@ EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); inline void perf_swevent_put_recursion_context(int rctx) { - struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); put_recursion_context(swhash->recursion, rctx); } @@ -5727,7 +5727,7 @@ static void perf_swevent_read(struct perf_event *event) static int perf_swevent_add(struct perf_event *event, int flags) { - struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); struct hw_perf_event *hwc = &event->hw; struct hlist_head *head; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 579712f4e9d5..77d92f8130e8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -650,10 +650,10 @@ static inline int cpu_of(struct rq *rq) DECLARE_PER_CPU(struct rq, runqueues); #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) -#define this_rq() (&__get_cpu_var(runqueues)) +#define this_rq() this_cpu_ptr(&runqueues) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) -#define raw_rq() (&__raw_get_cpu_var(runqueues)) +#define raw_rq() raw_cpu_ptr(&runqueues) static inline u64 rq_clock(struct rq *rq) { diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 13d2f7cd65db..b312fcc73024 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -638,7 +638,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) fill_tgid_exit(tsk); } - listeners = __this_cpu_ptr(&listener_array); + listeners = raw_cpu_ptr(&listener_array); if (list_empty(&listeners->list)) return; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 73f90932282b..3cadc112519f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -924,7 +924,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts) */ void tick_nohz_idle_exit(void) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); ktime_t now; local_irq_disable(); @@ -1041,7 +1041,7 @@ static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now) static inline void tick_nohz_irq_enter(void) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); ktime_t now; if (!ts->idle_active && !ts->tick_stopped) diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c index 394f70b17162..9586b670a5b2 100644 --- a/kernel/user-return-notifier.c +++ b/kernel/user-return-notifier.c @@ -14,7 +14,7 @@ static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); void user_return_notifier_register(struct user_return_notifier *urn) { set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); - hlist_add_head(&urn->link, &__get_cpu_var(return_notifier_list)); + hlist_add_head(&urn->link, this_cpu_ptr(&return_notifier_list)); } EXPORT_SYMBOL_GPL(user_return_notifier_register); @@ -25,7 +25,7 @@ EXPORT_SYMBOL_GPL(user_return_notifier_register); void user_return_notifier_unregister(struct user_return_notifier *urn) { hlist_del(&urn->link); - if (hlist_empty(&__get_cpu_var(return_notifier_list))) + if (hlist_empty(this_cpu_ptr(&return_notifier_list))) clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); } EXPORT_SYMBOL_GPL(user_return_notifier_unregister); -- cgit v1.2.3 From 47405a253da4d8ca4b18ad537423083fdd790440 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:56 -0500 Subject: percpu: Remove __this_cpu_ptr The __this_cpu_ptr macro is no longer in use so drop it. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/percpu-defs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index cfd56046ecec..420032d41d27 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -257,9 +257,6 @@ do { \ #define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var))) #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) -/* keep until we have removed all uses of __this_cpu_ptr */ -#define __this_cpu_ptr(ptr) raw_cpu_ptr(ptr) - /* * Must be an lvalue. Since @var must be a simple identifier, * we force a syntax error here if it isn't. -- cgit v1.2.3 From bf3baca6c54ce8a2f51687296f868dfe20d33f13 Mon Sep 17 00:00:00 2001 From: James Ban Date: Wed, 27 Aug 2014 11:47:07 +0900 Subject: regulator: da9211: support device tree This is a patch for supporting device tree of DA9211/DA9213. Signed-off-by: James Ban Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/da9211.txt | 63 ++++++++++++++++ drivers/regulator/da9211-regulator.c | 85 ++++++++++++++++++++-- include/linux/regulator/da9211.h | 2 +- 3 files changed, 142 insertions(+), 8 deletions(-) create mode 100644 Documentation/devicetree/bindings/regulator/da9211.txt (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/regulator/da9211.txt b/Documentation/devicetree/bindings/regulator/da9211.txt new file mode 100644 index 000000000000..240019a82f9a --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/da9211.txt @@ -0,0 +1,63 @@ +* Dialog Semiconductor DA9211/DA9213 Voltage Regulator + +Required properties: +- compatible: "dlg,da9211" or "dlg,da9213". +- reg: I2C slave address, usually 0x68. +- interrupts: the interrupt outputs of the controller +- regulators: A node that houses a sub-node for each regulator within the + device. Each sub-node is identified using the node's name, with valid + values listed below. The content of each sub-node is defined by the + standard binding for regulators; see regulator.txt. + BUCKA and BUCKB. + +Optional properties: +- Any optional property defined in regulator.txt + +Example 1) DA9211 + + pmic: da9211@68 { + compatible = "dlg,da9211"; + reg = <0x68>; + interrupts = <3 27>; + + regulators { + BUCKA { + regulator-name = "VBUCKA"; + regulator-min-microvolt = < 300000>; + regulator-max-microvolt = <1570000>; + regulator-min-microamp = <2000000>; + regulator-max-microamp = <5000000>; + }; + BUCKB { + regulator-name = "VBUCKB"; + regulator-min-microvolt = < 300000>; + regulator-max-microvolt = <1570000>; + regulator-min-microamp = <2000000>; + regulator-max-microamp = <5000000>; + }; + }; + }; + +Example 2) DA92113 + pmic: da9213@68 { + compatible = "dlg,da9213"; + reg = <0x68>; + interrupts = <3 27>; + + regulators { + BUCKA { + regulator-name = "VBUCKA"; + regulator-min-microvolt = < 300000>; + regulator-max-microvolt = <1570000>; + regulator-min-microamp = <3000000>; + regulator-max-microamp = <6000000>; + }; + BUCKB { + regulator-name = "VBUCKB"; + regulator-min-microvolt = < 300000>; + regulator-max-microvolt = <1570000>; + regulator-min-microamp = <3000000>; + regulator-max-microamp = <6000000>; + }; + }; + }; diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c index a26f1d283c57..5aabbac1b524 100644 --- a/drivers/regulator/da9211-regulator.c +++ b/drivers/regulator/da9211-regulator.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "da9211-regulator.h" @@ -236,6 +237,59 @@ static struct regulator_desc da9211_regulators[] = { DA9211_BUCK(BUCKB), }; +#ifdef CONFIG_OF +static struct of_regulator_match da9211_matches[] = { + [DA9211_ID_BUCKA] = { .name = "BUCKA" }, + [DA9211_ID_BUCKB] = { .name = "BUCKB" }, + }; + +static struct da9211_pdata *da9211_parse_regulators_dt( + struct device *dev) +{ + struct da9211_pdata *pdata; + struct device_node *node; + int i, num, n; + + node = of_get_child_by_name(dev->of_node, "regulators"); + if (!node) { + dev_err(dev, "regulators node not found\n"); + return ERR_PTR(-ENODEV); + } + + num = of_regulator_match(dev, node, da9211_matches, + ARRAY_SIZE(da9211_matches)); + of_node_put(node); + if (num < 0) { + dev_err(dev, "Failed to match regulators\n"); + return ERR_PTR(-EINVAL); + } + + pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return ERR_PTR(-ENOMEM); + + pdata->num_buck = num; + + n = 0; + for (i = 0; i < ARRAY_SIZE(da9211_matches); i++) { + if (!da9211_matches[i].init_data) + continue; + + pdata->init_data[n] = da9211_matches[i].init_data; + + n++; + }; + + return pdata; +} +#else +static struct da9211_pdata *da9211_parse_regulators_dt( + struct device *dev) +{ + return ERR_PTR(-ENODEV); +} +#endif + static irqreturn_t da9211_irq_handler(int irq, void *data) { struct da9211 *chip = data; @@ -306,7 +360,7 @@ static int da9211_regulator_init(struct da9211 *chip) } for (i = 0; i < chip->num_regulator; i++) { - config.init_data = &(chip->pdata->init_data[i]); + config.init_data = chip->pdata->init_data[i]; config.dev = chip->dev; config.driver_data = chip; config.regmap = chip->regmap; @@ -332,6 +386,21 @@ static int da9211_regulator_init(struct da9211 *chip) return 0; } + +static const struct i2c_device_id da9211_i2c_id[] = { + {"da9211", DA9211}, + {"da9213", DA9213}, + {}, +}; + +#ifdef CONFIG_OF +static const struct of_device_id da9211_dt_ids[] = { + { .compatible = "dlg,da9211", .data = &da9211_i2c_id[0] }, + { .compatible = "dlg,da9213", .data = &da9211_i2c_id[1] }, + {}, +}; +#endif + /* * I2C driver interface functions */ @@ -377,6 +446,14 @@ static int da9211_i2c_probe(struct i2c_client *i2c, return -ENODEV; } + if (!chip->pdata) + chip->pdata = da9211_parse_regulators_dt(chip->dev); + + if (IS_ERR(chip->pdata)) { + dev_err(chip->dev, "No regulators defined for the platform\n"); + return PTR_ERR(chip->pdata); + } + chip->chip_irq = i2c->irq; if (chip->chip_irq != 0) { @@ -401,12 +478,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c, return ret; } -static const struct i2c_device_id da9211_i2c_id[] = { - {"da9211", DA9211}, - {"da9213", DA9213}, - {}, -}; - MODULE_DEVICE_TABLE(i2c, da9211_i2c_id); static struct i2c_driver da9211_regulator_driver = { diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h index 658c3c33f4c0..5479394fefce 100644 --- a/include/linux/regulator/da9211.h +++ b/include/linux/regulator/da9211.h @@ -32,6 +32,6 @@ struct da9211_pdata { * 2 : 2 phase 2 buck */ int num_buck; - struct regulator_init_data *init_data; + struct regulator_init_data *init_data[DA9211_MAX_REGULATORS]; }; #endif -- cgit v1.2.3 From 6a4c264313c4ae32dc53821a9c57e0dc9696fb81 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 27 Aug 2014 06:21:23 +0930 Subject: module: rename KERNEL_PARAM_FL_NOARG to avoid confusion Make it clear this is about kernel_param_ops, not kernel_param (which will soon have a flags field of its own). No functional changes. Cc: Rusty Russell Cc: Jean Delvare Cc: Andrew Morton Cc: Li Zhong Cc: Jon Mason Cc: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 2 +- kernel/module.c | 2 +- kernel/params.c | 6 +++--- security/apparmor/lsm.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 494f99e852da..16fdddab856a 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -42,7 +42,7 @@ struct kernel_param; * NOARG - the parameter allows for no argument (foo instead of foo=1) */ enum { - KERNEL_PARAM_FL_NOARG = (1 << 0) + KERNEL_PARAM_OPS_FL_NOARG = (1 << 0) }; struct kernel_param_ops { diff --git a/kernel/module.c b/kernel/module.c index 03214bd288e9..8a0dc91eddbc 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -135,7 +135,7 @@ static int param_set_bool_enable_only(const char *val, } static const struct kernel_param_ops param_ops_bool_enable_only = { - .flags = KERNEL_PARAM_FL_NOARG, + .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bool_enable_only, .get = param_get_bool, }; diff --git a/kernel/params.c b/kernel/params.c index 34f527023794..8a484fc8bde8 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -104,7 +104,7 @@ static int parse_one(char *param, return 0; /* No one handled NULL, so do it here. */ if (!val && - !(params[i].ops->flags & KERNEL_PARAM_FL_NOARG)) + !(params[i].ops->flags & KERNEL_PARAM_OPS_FL_NOARG)) return -EINVAL; pr_debug("handling %s with %p\n", param, params[i].ops->set); @@ -318,7 +318,7 @@ int param_get_bool(char *buffer, const struct kernel_param *kp) EXPORT_SYMBOL(param_get_bool); struct kernel_param_ops param_ops_bool = { - .flags = KERNEL_PARAM_FL_NOARG, + .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bool, .get = param_get_bool, }; @@ -369,7 +369,7 @@ int param_set_bint(const char *val, const struct kernel_param *kp) EXPORT_SYMBOL(param_set_bint); struct kernel_param_ops param_ops_bint = { - .flags = KERNEL_PARAM_FL_NOARG, + .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bint, .get = param_get_int, }; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 998100093332..65ca451a764d 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -668,7 +668,7 @@ static int param_set_aabool(const char *val, const struct kernel_param *kp); static int param_get_aabool(char *buffer, const struct kernel_param *kp); #define param_check_aabool param_check_bool static struct kernel_param_ops param_ops_aabool = { - .flags = KERNEL_PARAM_FL_NOARG, + .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_aabool, .get = param_get_aabool }; @@ -685,7 +685,7 @@ static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp); #define param_check_aalockpolicy param_check_bool static struct kernel_param_ops param_ops_aalockpolicy = { - .flags = KERNEL_PARAM_FL_NOARG, + .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_aalockpolicy, .get = param_get_aalockpolicy }; -- cgit v1.2.3 From 91f9d330cc14932084c37751997213cb0e7ea882 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 27 Aug 2014 06:22:23 +0930 Subject: module: make it possible to have unsafe, tainting module params Add flags field to struct kernel_params, and add the first flag: unsafe parameter. Modifying a kernel parameter with the unsafe flag set, either via the kernel command line or sysfs, will issue a warning and taint the kernel. Cc: Rusty Russell Cc: Jean Delvare Cc: Andrew Morton Cc: Li Zhong Cc: Jon Mason Cc: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Rusty Russell --- drivers/tty/serial/8250/8250_core.c | 2 +- include/linux/moduleparam.h | 44 +++++++++++++++++++++++++++++-------- kernel/params.c | 11 ++++++++++ 3 files changed, 47 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 1d42dba6121d..bd672948f2f1 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -3587,7 +3587,7 @@ static void __used s8250_options(void) #ifdef CONFIG_SERIAL_8250_RSA __module_param_call(MODULE_PARAM_PREFIX, probe_rsa, ¶m_array_ops, .arr = &__param_arr_probe_rsa, - 0444, -1); + 0444, -1, 0); #endif } #else diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 16fdddab856a..1e3ffb839daa 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -56,11 +56,21 @@ struct kernel_param_ops { void (*free)(void *arg); }; +/* + * Flags available for kernel_param + * + * UNSAFE - the parameter is dangerous and setting it will taint the kernel + */ +enum { + KERNEL_PARAM_FL_UNSAFE = (1 << 0) +}; + struct kernel_param { const char *name; const struct kernel_param_ops *ops; u16 perm; - s16 level; + s8 level; + u8 flags; union { void *arg; const struct kparam_string *str; @@ -137,7 +147,7 @@ struct kparam_array * The ops can have NULL set or get functions. */ #define module_param_cb(name, ops, arg, perm) \ - __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1) + __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1, 0) /** * _param_cb - general callback for a module/cmdline parameter @@ -149,7 +159,7 @@ struct kparam_array * The ops can have NULL set or get functions. */ #define __level_param_cb(name, ops, arg, perm, level) \ - __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, level) + __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, level, 0) #define core_param_cb(name, ops, arg, perm) \ __level_param_cb(name, ops, arg, perm, 1) @@ -184,14 +194,14 @@ struct kparam_array /* This is the fundamental function for registering boot/module parameters. */ -#define __module_param_call(prefix, name, ops, arg, perm, level) \ +#define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ /* Default value instead of permissions? */ \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ = { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm), \ - level, { arg } } + level, flags, { arg } } /* Obsolete - use module_param_cb() */ #define module_param_call(name, set, get, arg, perm) \ @@ -199,7 +209,7 @@ struct kparam_array { 0, (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, \ - (perm) + sizeof(__check_old_set_param(set))*0, -1) + (perm) + sizeof(__check_old_set_param(set))*0, -1, 0) /* We don't get oldget: it's often a new-style param_get_uint, etc. */ static inline int @@ -279,7 +289,7 @@ static inline void __kernel_param_unlock(void) */ #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ - __module_param_call("", name, ¶m_ops_##type, &var, perm, -1) + __module_param_call("", name, ¶m_ops_##type, &var, perm, -1, 0) #endif /* !MODULE */ /** @@ -297,7 +307,7 @@ static inline void __kernel_param_unlock(void) = { len, string }; \ __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_ops_string, \ - .str = &__param_string_##name, perm, -1); \ + .str = &__param_string_##name, perm, -1, 0);\ __MODULE_PARM_TYPE(name, "string") /** @@ -346,6 +356,22 @@ static inline void destroy_params(const struct kernel_param *params, #define __param_check(name, p, type) \ static inline type __always_unused *__check_##name(void) { return(p); } +/** + * param_check_unsafe - Warn and taint the kernel if setting dangerous options. + * + * This gets called from all the standard param setters, but can be used from + * custom setters as well. + */ +static inline void +param_check_unsafe(const struct kernel_param *kp) +{ + if (kp->flags & KERNEL_PARAM_FL_UNSAFE) { + pr_warn("Setting dangerous option %s - tainting kernel\n", + kp->name); + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + } +} + extern struct kernel_param_ops param_ops_byte; extern int param_set_byte(const char *val, const struct kernel_param *kp); extern int param_get_byte(char *buffer, const struct kernel_param *kp); @@ -444,7 +470,7 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp); __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_array_ops, \ .arr = &__param_arr_##name, \ - perm, -1); \ + perm, -1, 0); \ __MODULE_PARM_TYPE(name, "array of " #type) extern struct kernel_param_ops param_array_ops; diff --git a/kernel/params.c b/kernel/params.c index 8a484fc8bde8..ad8d04563c3a 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -233,6 +233,7 @@ char *parse_args(const char *doing, #define STANDARD_PARAM_DEF(name, type, format, strtolfn) \ int param_set_##name(const char *val, const struct kernel_param *kp) \ { \ + param_check_unsafe(kp); \ return strtolfn(val, 0, (type *)kp->arg); \ } \ int param_get_##name(char *buffer, const struct kernel_param *kp) \ @@ -265,6 +266,8 @@ int param_set_charp(const char *val, const struct kernel_param *kp) return -ENOSPC; } + param_check_unsafe(kp); + maybe_kfree_parameter(*(char **)kp->arg); /* This is a hack. We can't kmalloc in early boot, and we @@ -302,6 +305,8 @@ EXPORT_SYMBOL(param_ops_charp); /* Actually could be a bool or an int, for historical reasons. */ int param_set_bool(const char *val, const struct kernel_param *kp) { + param_check_unsafe(kp); + /* No equals means "set"... */ if (!val) val = "1"; @@ -331,6 +336,8 @@ int param_set_invbool(const char *val, const struct kernel_param *kp) bool boolval; struct kernel_param dummy; + param_check_unsafe(kp); + dummy.arg = &boolval; ret = param_set_bool(val, &dummy); if (ret == 0) @@ -357,6 +364,8 @@ int param_set_bint(const char *val, const struct kernel_param *kp) bool v; int ret; + param_check_unsafe(kp); + /* Match bool exactly, by re-using it. */ boolkp = *kp; boolkp.arg = &v; @@ -476,6 +485,8 @@ int param_set_copystring(const char *val, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; + param_check_unsafe(kp); + if (strlen(val)+1 > kps->maxlen) { pr_err("%s: string doesn't fit in %u chars.\n", kp->name, kps->maxlen-1); -- cgit v1.2.3 From 3baee201b06cfaff84c2c5ddc551b192bb3eaed3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 27 Aug 2014 06:23:23 +0930 Subject: module: add module_param_unsafe and module_param_named_unsafe Add the helpers to be used by modules wishing to expose unsafe debugging or testing module parameters that taint the kernel when set. Cc: Rusty Russell Cc: Jean Delvare Cc: Andrew Morton Cc: Li Zhong Cc: Jon Mason Cc: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1e3ffb839daa..9531f9f9729e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -122,6 +122,12 @@ struct kparam_array #define module_param(name, type, perm) \ module_param_named(name, name, type, perm) +/** + * module_param_unsafe - same as module_param but taints kernel + */ +#define module_param_unsafe(name, type, perm) \ + module_param_named_unsafe(name, name, type, perm) + /** * module_param_named - typesafe helper for a renamed module/cmdline parameter * @name: a valid C identifier which is the parameter name. @@ -138,6 +144,14 @@ struct kparam_array module_param_cb(name, ¶m_ops_##type, &value, perm); \ __MODULE_PARM_TYPE(name, #type) +/** + * module_param_named_unsafe - same as module_param_named but taints kernel + */ +#define module_param_named_unsafe(name, value, type, perm) \ + param_check_##type(name, &(value)); \ + module_param_cb_unsafe(name, ¶m_ops_##type, &value, perm); \ + __MODULE_PARM_TYPE(name, #type) + /** * module_param_cb - general callback for a module/cmdline parameter * @name: a valid C identifier which is the parameter name. @@ -149,6 +163,10 @@ struct kparam_array #define module_param_cb(name, ops, arg, perm) \ __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1, 0) +#define module_param_cb_unsafe(name, ops, arg, perm) \ + __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1, \ + KERNEL_PARAM_FL_UNSAFE) + /** * _param_cb - general callback for a module/cmdline parameter * to be evaluated before certain initcall level -- cgit v1.2.3 From 7a486d3781295b5298cbf9556928a76d26896863 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 27 Aug 2014 06:25:23 +0930 Subject: param: check for tainting before calling set op. This means every set op doesn't need to call it, and it can move into params.c. Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 16 ---------------- kernel/params.c | 22 +++++++++++----------- 2 files changed, 11 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 9531f9f9729e..593501996574 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -374,22 +374,6 @@ static inline void destroy_params(const struct kernel_param *params, #define __param_check(name, p, type) \ static inline type __always_unused *__check_##name(void) { return(p); } -/** - * param_check_unsafe - Warn and taint the kernel if setting dangerous options. - * - * This gets called from all the standard param setters, but can be used from - * custom setters as well. - */ -static inline void -param_check_unsafe(const struct kernel_param *kp) -{ - if (kp->flags & KERNEL_PARAM_FL_UNSAFE) { - pr_warn("Setting dangerous option %s - tainting kernel\n", - kp->name); - add_taint(TAINT_USER, LOCKDEP_STILL_OK); - } -} - extern struct kernel_param_ops param_ops_byte; extern int param_set_byte(const char *val, const struct kernel_param *kp); extern int param_get_byte(char *buffer, const struct kernel_param *kp); diff --git a/kernel/params.c b/kernel/params.c index ad8d04563c3a..041b5899d5e2 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -83,6 +83,15 @@ bool parameq(const char *a, const char *b) return parameqn(a, b, strlen(a)+1); } +static void param_check_unsafe(const struct kernel_param *kp) +{ + if (kp->flags & KERNEL_PARAM_FL_UNSAFE) { + pr_warn("Setting dangerous option %s - tainting kernel\n", + kp->name); + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + } +} + static int parse_one(char *param, char *val, const char *doing, @@ -109,6 +118,7 @@ static int parse_one(char *param, pr_debug("handling %s with %p\n", param, params[i].ops->set); mutex_lock(¶m_lock); + param_check_unsafe(¶ms[i]); err = params[i].ops->set(val, ¶ms[i]); mutex_unlock(¶m_lock); return err; @@ -233,7 +243,6 @@ char *parse_args(const char *doing, #define STANDARD_PARAM_DEF(name, type, format, strtolfn) \ int param_set_##name(const char *val, const struct kernel_param *kp) \ { \ - param_check_unsafe(kp); \ return strtolfn(val, 0, (type *)kp->arg); \ } \ int param_get_##name(char *buffer, const struct kernel_param *kp) \ @@ -266,8 +275,6 @@ int param_set_charp(const char *val, const struct kernel_param *kp) return -ENOSPC; } - param_check_unsafe(kp); - maybe_kfree_parameter(*(char **)kp->arg); /* This is a hack. We can't kmalloc in early boot, and we @@ -305,8 +312,6 @@ EXPORT_SYMBOL(param_ops_charp); /* Actually could be a bool or an int, for historical reasons. */ int param_set_bool(const char *val, const struct kernel_param *kp) { - param_check_unsafe(kp); - /* No equals means "set"... */ if (!val) val = "1"; @@ -336,8 +341,6 @@ int param_set_invbool(const char *val, const struct kernel_param *kp) bool boolval; struct kernel_param dummy; - param_check_unsafe(kp); - dummy.arg = &boolval; ret = param_set_bool(val, &dummy); if (ret == 0) @@ -364,8 +367,6 @@ int param_set_bint(const char *val, const struct kernel_param *kp) bool v; int ret; - param_check_unsafe(kp); - /* Match bool exactly, by re-using it. */ boolkp = *kp; boolkp.arg = &v; @@ -485,8 +486,6 @@ int param_set_copystring(const char *val, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; - param_check_unsafe(kp); - if (strlen(val)+1 > kps->maxlen) { pr_err("%s: string doesn't fit in %u chars.\n", kp->name, kps->maxlen-1); @@ -563,6 +562,7 @@ static ssize_t param_attr_store(struct module_attribute *mattr, return -EPERM; mutex_lock(¶m_lock); + param_check_unsafe(attribute->param); err = attribute->param->ops->set(buf, attribute->param); mutex_unlock(¶m_lock); if (!err) -- cgit v1.2.3 From 64d831269ccbca1fc6d739a0f3c8aa24afb43a5e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Aug 2014 12:15:00 +0200 Subject: KVM: Introduce gfn_to_hva_memslot_prot To support read-only memory regions on arm and arm64, we have a need to resolve a gfn to an hva given a pointer to a memslot to avoid looping through the memslots twice and to reuse the hva error checking of gfn_to_hva_prot(), add a new gfn_to_hva_memslot_prot() function and refactor gfn_to_hva_prot() to use this function. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ebd723676633..6d8a658ec174 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -528,6 +528,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); +unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, + bool *writable); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5a0817ee996e..76c92a7249c4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1076,9 +1076,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva); * If writable is set to false, the hva returned by this function is only * allowed to be read. */ -unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) +unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, + gfn_t gfn, bool *writable) { - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); if (!kvm_is_error_hva(hva) && writable) @@ -1087,6 +1087,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) return hva; } +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) +{ + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + + return gfn_to_hva_memslot_prot(slot, gfn, writable); +} + static int kvm_read_hva(void *data, void __user *hva, int len) { return __copy_from_user(data, hva, len); -- cgit v1.2.3 From 5c21403d74af2c9cd635a34c2f9199681a5b813e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 27 Aug 2014 14:39:04 -0700 Subject: net: Update sk_buff flag bit availability comment. We lost one when xmit_more was added. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9b3802a197a8..b69b7b512c06 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -580,7 +580,7 @@ struct sk_buff { __u8 encap_hdr_csum:1; __u8 csum_valid:1; __u8 csum_complete_sw:1; - /* 2/4 bit hole (depending on ndisc_nodetype presence) */ + /* 1/3 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL -- cgit v1.2.3 From 3e8a72d1dae374cf6fc1dba97cec663585845ff9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 27 Aug 2014 17:04:46 -0700 Subject: net: dsa: reduce number of protocol hooks DSA is currently registering one packet_type function per EtherType it needs to intercept in the receive path of a DSA-enabled Ethernet device. Right now we have three of them: trailer, DSA and eDSA, and there might be more in the future, this will not scale to the addition of new protocols. This patch proceeds with adding a new layer of abstraction and two new functions: dsa_switch_rcv() which will dispatch into the tag-protocol specific receive function implemented by net/dsa/tag_*.c dsa_slave_xmit() which will dispatch into the tag-protocol specific transmit function implemented by net/dsa/tag_*.c When we do create the per-port slave network devices, we iterate over the switch protocol to assign the DSA-specific receive and transmit operations. A new fake ethertype value is used: ETH_P_XDSA to illustrate the fact that this is no longer going to look like ETH_P_DSA or ETH_P_TRAILER like it used to be. This allows us to greatly simplify the check in eth_type_trans() and always override the skb->protocol with ETH_P_XDSA for Ethernet switches tagged protocol, while also reducing the number repetitive slave netdevice_ops assignments. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 28 ++++++++++++--------------- include/net/dsa.h | 20 +++---------------- include/uapi/linux/if_ether.h | 1 + net/dsa/dsa.c | 39 ++++++++++++++++++++----------------- net/dsa/dsa_priv.h | 9 +++------ net/dsa/slave.c | 45 ++++++++++++++----------------------------- net/dsa/tag_dsa.c | 8 ++++---- net/dsa/tag_edsa.c | 8 ++++---- net/dsa/tag_trailer.c | 8 ++++---- net/ethernet/eth.c | 7 ++----- 10 files changed, 68 insertions(+), 105 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 039b23786c22..1875dc71422a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1781,24 +1781,13 @@ void dev_net_set(struct net_device *dev, struct net *net) #endif } -static inline bool netdev_uses_dsa_tags(struct net_device *dev) +static inline bool netdev_uses_dsa(struct net_device *dev) { -#ifdef CONFIG_NET_DSA_TAG_DSA - if (dev->dsa_ptr != NULL) - return dsa_uses_dsa_tags(dev->dsa_ptr); -#endif - - return 0; -} - -static inline bool netdev_uses_trailer_tags(struct net_device *dev) -{ -#ifdef CONFIG_NET_DSA_TAG_TRAILER - if (dev->dsa_ptr != NULL) - return dsa_uses_trailer_tags(dev->dsa_ptr); +#ifdef CONFIG_NET_DSA + return dev->dsa_ptr != NULL; +#else + return false; #endif - - return 0; } /** @@ -1933,6 +1922,13 @@ struct udp_offload { struct offload_callbacks callbacks; }; +struct dsa_device_ops { + netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev); + int (*rcv)(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); +}; + + /* often modified stats are per cpu, other are shared (netdev->stats) */ struct pcpu_sw_netstats { u64 rx_packets; diff --git a/include/net/dsa.h b/include/net/dsa.h index 6efce384451e..6e26f1e4d8ce 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -59,6 +59,8 @@ struct dsa_platform_data { struct dsa_chip_data *chip; }; +struct dsa_device_ops; + struct dsa_switch_tree { /* * Configuration data for the platform device that owns @@ -71,6 +73,7 @@ struct dsa_switch_tree { * protocol to use. */ struct net_device *master_netdev; + const struct dsa_device_ops *ops; __be16 tag_protocol; /* @@ -186,21 +189,4 @@ static inline void *ds_to_priv(struct dsa_switch *ds) return (void *)(ds + 1); } -/* - * The original DSA tag format and some other tag formats have no - * ethertype, which means that we need to add a little hack to the - * networking receive path to make sure that received frames get - * the right ->protocol assigned to them when one of those tag - * formats is in use. - */ -static inline bool dsa_uses_dsa_tags(struct dsa_switch_tree *dst) -{ - return !!(dst->tag_protocol == htons(ETH_P_DSA)); -} - -static inline bool dsa_uses_trailer_tags(struct dsa_switch_tree *dst) -{ - return !!(dst->tag_protocol == htons(ETH_P_TRAILER)); -} - #endif diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 0f8210b8e0bc..aa63ed023c2b 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -128,6 +128,7 @@ #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ #define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */ #define ETH_P_CAIF 0x00F7 /* ST-Ericsson CAIF protocol */ +#define ETH_P_XDSA 0x00F8 /* Multiplexed DSA protocol */ /* * This is an Ethernet frame header. diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0a49632fac47..92e71d2a2ccd 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -608,6 +608,24 @@ static void dsa_shutdown(struct platform_device *pdev) { } +static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + + if (unlikely(dst == NULL)) { + kfree_skb(skb); + return 0; + } + + return dst->ops->rcv(skb, dev, pt, orig_dev); +} + +struct packet_type dsa_pack_type __read_mostly = { + .type = cpu_to_be16(ETH_P_XDSA), + .func = dsa_switch_rcv, +}; + static const struct of_device_id dsa_of_match_table[] = { { .compatible = "marvell,dsa", }, {} @@ -633,30 +651,15 @@ static int __init dsa_init_module(void) if (rc) return rc; -#ifdef CONFIG_NET_DSA_TAG_DSA - dev_add_pack(&dsa_packet_type); -#endif -#ifdef CONFIG_NET_DSA_TAG_EDSA - dev_add_pack(&edsa_packet_type); -#endif -#ifdef CONFIG_NET_DSA_TAG_TRAILER - dev_add_pack(&trailer_packet_type); -#endif + dev_add_pack(&dsa_pack_type); + return 0; } module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { -#ifdef CONFIG_NET_DSA_TAG_TRAILER - dev_remove_pack(&trailer_packet_type); -#endif -#ifdef CONFIG_NET_DSA_TAG_EDSA - dev_remove_pack(&edsa_packet_type); -#endif -#ifdef CONFIG_NET_DSA_TAG_DSA - dev_remove_pack(&dsa_packet_type); -#endif + dev_remove_pack(&dsa_pack_type); platform_driver_unregister(&dsa_driver); } module_exit(dsa_cleanup_module); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index d4cf5cc747e3..218d75d16f6f 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -45,16 +45,13 @@ struct net_device *dsa_slave_create(struct dsa_switch *ds, int port, char *name); /* tag_dsa.c */ -netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev); -extern struct packet_type dsa_packet_type; +extern const struct dsa_device_ops dsa_netdev_ops; /* tag_edsa.c */ -netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev); -extern struct packet_type edsa_packet_type; +extern const struct dsa_device_ops edsa_netdev_ops; /* tag_trailer.c */ -netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev); -extern struct packet_type trailer_packet_type; +extern const struct dsa_device_ops trailer_netdev_ops; #endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 45a1e34c89e0..ad1a913533aa 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -171,6 +171,14 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return -EOPNOTSUPP; } +static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch_tree *dst = p->parent->dst; + + return dst->ops->xmit(skb, dev); +} + /* ethtool operations *******************************************************/ static int @@ -293,42 +301,16 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_sset_count = dsa_slave_get_sset_count, }; -#ifdef CONFIG_NET_DSA_TAG_DSA -static const struct net_device_ops dsa_netdev_ops = { +static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_init = dsa_slave_init, .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, - .ndo_start_xmit = dsa_xmit, + .ndo_start_xmit = dsa_slave_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; -#endif -#ifdef CONFIG_NET_DSA_TAG_EDSA -static const struct net_device_ops edsa_netdev_ops = { - .ndo_init = dsa_slave_init, - .ndo_open = dsa_slave_open, - .ndo_stop = dsa_slave_close, - .ndo_start_xmit = edsa_xmit, - .ndo_change_rx_flags = dsa_slave_change_rx_flags, - .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_do_ioctl = dsa_slave_ioctl, -}; -#endif -#ifdef CONFIG_NET_DSA_TAG_TRAILER -static const struct net_device_ops trailer_netdev_ops = { - .ndo_init = dsa_slave_init, - .ndo_open = dsa_slave_open, - .ndo_stop = dsa_slave_close, - .ndo_start_xmit = trailer_xmit, - .ndo_change_rx_flags = dsa_slave_change_rx_flags, - .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_do_ioctl = dsa_slave_ioctl, -}; -#endif /* slave device setup *******************************************************/ struct net_device * @@ -349,21 +331,22 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; + slave_dev->netdev_ops = &dsa_slave_netdev_ops; switch (ds->dst->tag_protocol) { #ifdef CONFIG_NET_DSA_TAG_DSA case htons(ETH_P_DSA): - slave_dev->netdev_ops = &dsa_netdev_ops; + ds->dst->ops = &dsa_netdev_ops; break; #endif #ifdef CONFIG_NET_DSA_TAG_EDSA case htons(ETH_P_EDSA): - slave_dev->netdev_ops = &edsa_netdev_ops; + ds->dst->ops = &edsa_netdev_ops; break; #endif #ifdef CONFIG_NET_DSA_TAG_TRAILER case htons(ETH_P_TRAILER): - slave_dev->netdev_ops = &trailer_netdev_ops; + ds->dst->ops = &trailer_netdev_ops; break; #endif default: diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index cacce1e22f9c..d7dbc5bda5c0 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -16,7 +16,7 @@ #define DSA_HLEN 4 -netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); u8 *dsa_header; @@ -186,7 +186,7 @@ out: return 0; } -struct packet_type dsa_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_DSA), - .func = dsa_rcv, +const struct dsa_device_ops dsa_netdev_ops = { + .xmit = dsa_xmit, + .rcv = dsa_rcv, }; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index e70c43c25e64..6b30abe89183 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -17,7 +17,7 @@ #define DSA_HLEN 4 #define EDSA_HLEN 8 -netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); u8 *edsa_header; @@ -205,7 +205,7 @@ out: return 0; } -struct packet_type edsa_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_EDSA), - .func = edsa_rcv, +const struct dsa_device_ops edsa_netdev_ops = { + .xmit = edsa_xmit, + .rcv = edsa_rcv, }; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 94bc260d015d..5fe9444842c5 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -14,7 +14,7 @@ #include #include "dsa_priv.h" -netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct sk_buff *nskb; @@ -114,7 +114,7 @@ out: return 0; } -struct packet_type trailer_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_TRAILER), - .func = trailer_rcv, +const struct dsa_device_ops trailer_netdev_ops = { + .xmit = trailer_xmit, + .rcv = trailer_rcv, }; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f405e0592407..5cebca134585 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -181,11 +181,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * variants has been configured on the receiving interface, * and if so, set skb->protocol without looking at the packet. */ - if (unlikely(netdev_uses_dsa_tags(dev))) - return htons(ETH_P_DSA); - - if (unlikely(netdev_uses_trailer_tags(dev))) - return htons(ETH_P_TRAILER); + if (unlikely(netdev_uses_dsa(dev))) + return htons(ETH_P_XDSA); if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN)) return eth->h_proto; -- cgit v1.2.3 From 464c3668f065baeacfffa9d421959d21069389fe Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 27 Aug 2014 17:04:48 -0700 Subject: net: phy: provide stub for fixed_phy_set_link_update In preparation for updating the DSA code and avoid using ifdefs there, provide an empty stub for fixed_phy_set_link_update when CONFIG_FIXED_PHY is not selected. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy_fixed.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index ae612acebb53..941138664c1d 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -18,6 +18,9 @@ extern int fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, struct device_node *np); extern void fixed_phy_del(int phy_addr); +extern int fixed_phy_set_link_update(struct phy_device *phydev, + int (*link_update)(struct net_device *, + struct fixed_phy_status *)); #else static inline int fixed_phy_add(unsigned int irq, int phy_id, struct fixed_phy_status *status) @@ -34,14 +37,12 @@ static inline int fixed_phy_del(int phy_addr) { return -ENODEV; } -#endif /* CONFIG_FIXED_PHY */ - -/* - * This function issued only by fixed_phy-aware drivers, no need - * protect it with #ifdef - */ -extern int fixed_phy_set_link_update(struct phy_device *phydev, +static inline int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, - struct fixed_phy_status *)); + struct fixed_phy_status *)) +{ + return -ENODEV; +} +#endif /* CONFIG_FIXED_PHY */ #endif /* __PHY_FIXED_H */ -- cgit v1.2.3 From 5aed85cec29882d1c4b4b2a01cb75a99efdbe4ed Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 27 Aug 2014 17:04:52 -0700 Subject: net: dsa: allow switches to work without tagging In case switch port tagging is disabled (voluntarily, or the switch just does not support it), allow us to continue using the defined set of dsa_device_ops in net/dsa/slave.c. We introduce dsa_protocol_is_tagged() to check whether we need to override skb->protocol and go through the DSA-specifif packet_type function, or if we just go on and receive the SKB through the normal path. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++--- include/net/dsa.h | 5 +++++ net/dsa/slave.c | 19 ++++++++++++++++++- 3 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1875dc71422a..429801370d0c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1784,10 +1784,10 @@ void dev_net_set(struct net_device *dev, struct net *net) static inline bool netdev_uses_dsa(struct net_device *dev) { #ifdef CONFIG_NET_DSA - return dev->dsa_ptr != NULL; -#else - return false; + if (dev->dsa_ptr != NULL) + return dsa_uses_tagged_protocol(dev->dsa_ptr); #endif + return false; } /** diff --git a/include/net/dsa.h b/include/net/dsa.h index dc357454ae3b..1035f6452d79 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -198,4 +198,9 @@ static inline void *ds_to_priv(struct dsa_switch *ds) return (void *)(ds + 1); } +static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) +{ + return dst->tag_protocol != 0; +} + #endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 03d2894a0f8a..241c2a1684cb 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -181,6 +181,17 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) return dst->ops->xmit(skb, dev); } +static netdev_tx_t dsa_slave_notag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + skb->dev = p->parent->dst->master_netdev; + dev_queue_xmit(skb); + + return NETDEV_TX_OK; +} + /* ethtool operations *******************************************************/ static int @@ -314,6 +325,11 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_do_ioctl = dsa_slave_ioctl, }; +static const struct dsa_device_ops notag_netdev_ops = { + .xmit = dsa_slave_notag_xmit, + .rcv = NULL, +}; + static void dsa_slave_adjust_link(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -415,7 +431,8 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, break; #endif default: - BUG(); + ds->dst->ops = ¬ag_netdev_ops; + break; } SET_NETDEV_DEV(slave_dev, parent); -- cgit v1.2.3 From 97fdaab4699de3a2a91001efef60bb0622de1c53 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 26 Aug 2014 13:15:25 -0700 Subject: net: phy: broadcom: fix PHY_BCM_OUI_4 PHY_BCM_OUI_4 is missing two significant digits that actually make it an OUI, add those missing bits so it becomes usable again for matching. Fixes: b560a58c45c6 ("net: phy: add Broadcom BCM7xxx internal PHY driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index ee1431d976fa..921f17ca4c26 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -21,7 +21,7 @@ #define PHY_BCM_OUI_1 0x00206000 #define PHY_BCM_OUI_2 0x0143bc00 #define PHY_BCM_OUI_3 0x03625c00 -#define PHY_BCM_OUI_4 0x600d0000 +#define PHY_BCM_OUI_4 0x600d8400 #define PHY_BCM_OUI_5 0x03625e00 -- cgit v1.2.3 From 11bf2bbd596add62a86a74fc7aedc0b86c6ec154 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 26 Aug 2014 13:15:26 -0700 Subject: net: phy: broadcom: add new Broadcom OUI Broadcom started to use a new OUI for its 2013 and newer products: D4-01-29 which translates into 0xae025000 for a 32-bits OUI, add its definition. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 921f17ca4c26..cbcfad36d4c0 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -23,7 +23,7 @@ #define PHY_BCM_OUI_3 0x03625c00 #define PHY_BCM_OUI_4 0x600d8400 #define PHY_BCM_OUI_5 0x03625e00 - +#define PHY_BCM_OUI_6 0xae025000 #define PHY_BCM_FLAGS_MODE_COPPER 0x00000001 #define PHY_BCM_FLAGS_MODE_1000BX 0x00000002 -- cgit v1.2.3 From 430ad68ffb5fa632a277162e5995cd6f7a39fb78 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 26 Aug 2014 13:15:27 -0700 Subject: net: phy: bcm7xxx: add BCM7250 and BCM7364 PHY entries Add two new entries to the Broadcom BCM7xxx internal PHY driver for BCM7250 and BCM7364 chips. Those chips share the usual 28nm process Gigabit PHY sequence and require the same workarounds so far. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/bcm7xxx.c | 4 ++++ include/linux/brcmphy.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 948c7086679a..09dd6e1dc6e1 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -335,6 +335,8 @@ static int bcm7xxx_dummy_config_init(struct phy_device *phydev) } static struct phy_driver bcm7xxx_driver[] = { + BCM7XXX_28NM_GPHY(PHY_ID_BCM7250, "Broadcom BCM7250"), + BCM7XXX_28NM_GPHY(PHY_ID_BCM7364, "Broadcom BCM7364"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7366, "Broadcom BCM7366"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"), @@ -367,6 +369,8 @@ static struct phy_driver bcm7xxx_driver[] = { } }; static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = { + { PHY_ID_BCM7250, 0xfffffff0, }, + { PHY_ID_BCM7364, 0xfffffff0, }, { PHY_ID_BCM7366, 0xfffffff0, }, { PHY_ID_BCM7439, 0xfffffff0, }, { PHY_ID_BCM7445, 0xfffffff0, }, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index cbcfad36d4c0..5bd35cc0d471 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -13,6 +13,8 @@ #define PHY_ID_BCM5461 0x002060c0 #define PHY_ID_BCM57780 0x03625d90 +#define PHY_ID_BCM7250 0xae025280 +#define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7445 0x600d8510 -- cgit v1.2.3 From 4ba2968420fa9d0604b6a6a5c61bfa8d0fa84ae0 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 26 Aug 2014 19:12:21 -0500 Subject: percpu: Resolve ambiguities in __get_cpu_var/cpumask_var_t __get_cpu_var can paper over differences in the definitions of cpumask_var_t and either use the address of the cpumask variable directly or perform a fetch of the address of the struct cpumask allocated elsewhere. This is important particularly when using per cpu cpumask_var_t declarations because in one case we have an offset into a per cpu area to handle and in the other case we need to fetch a pointer from the offset. This patch introduces a new macro this_cpu_cpumask_var_ptr() that is defined where cpumask_var_t is defined and performs the proper actions. All use cases where __get_cpu_var is used with cpumask_var_t are converted to the use of this_cpu_cpumask_var_ptr(). Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/perf_event_p4.h | 2 +- arch/x86/kernel/apic/x2apic_cluster.c | 3 +-- arch/x86/oprofile/op_model_p4.c | 2 +- include/linux/cpumask.h | 11 +++++++++++ kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 2 +- kernel/sched/rt.c | 2 +- 7 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 85e13ccf15c4..d725382c2ae0 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -189,7 +189,7 @@ static inline int p4_ht_thread(int cpu) { #ifdef CONFIG_SMP if (smp_num_siblings == 2) - return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); + return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 6ce600f9bc78..1f5d5f2ffae6 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -42,8 +42,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) * We are to modify mask, so we need an own copy * and be sure it's manipulated with irq off. */ - ipi_mask_ptr = __raw_get_cpu_var(ipi_mask); - cpumask_copy(ipi_mask_ptr, mask); + ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask); /* * The idea is to send one IPI per cluster. diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 98ab13058f89..ad1d91f475ab 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -372,7 +372,7 @@ static unsigned int get_stagger(void) { #ifdef CONFIG_SMP int cpu = smp_processor_id(); - return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); + return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; } diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 2997af6d2ccd..0a9a6da21e74 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -666,10 +666,19 @@ static inline size_t cpumask_size(void) * * This code makes NR_CPUS length memcopy and brings to a memory corruption. * cpumask_copy() provide safe copy functionality. + * + * Note that there is another evil here: If you define a cpumask_var_t + * as a percpu variable then the way to obtain the address of the cpumask + * structure differently influences what this_cpu_* operation needs to be + * used. Please use this_cpu_cpumask_var_t in those cases. The direct use + * of this_cpu_ptr() or this_cpu_read() will lead to failures when the + * other type of cpumask_var_t implementation is configured. */ #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; +#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) + bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); @@ -681,6 +690,8 @@ void free_bootmem_cpumask_var(cpumask_var_t mask); #else typedef struct cpumask cpumask_var_t[1]; +#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) + static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return true; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 255ce138b652..4a608cfaecbd 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1158,7 +1158,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); static int find_later_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl); + struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl); int this_cpu = smp_processor_id(); int best_cpu, cpu = task_cpu(task); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bfa3c86d0d68..197d659c144c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6539,7 +6539,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_group *group; struct rq *busiest; unsigned long flags; - struct cpumask *cpus = __get_cpu_var(load_balance_mask); + struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask); struct lb_env env = { .sd = sd, diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 5f6edca4fafd..a4c50fce9b90 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1526,7 +1526,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); + struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); -- cgit v1.2.3 From d7cdb968081727746c8d2fb31b12ea6d1694888e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Jun 2014 17:19:06 +0200 Subject: treewide: fix synchronize_rcu() in comments Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Jiri Kosina --- include/linux/percpu-refcount.h | 2 +- net/rds/af_rds.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 3dfbf237cd8f..652fd64cab5e 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -29,7 +29,7 @@ * calls io_destroy() or the process exits. * * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it - * calls percpu_ref_kill(), then hlist_del_rcu() and sychronize_rcu() to remove + * calls percpu_ref_kill(), then hlist_del_rcu() and synchronize_rcu() to remove * the kioctx from the proccess's list of kioctxs - after that, there can't be * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop * the initial ref with percpu_ref_put(). diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 424ff622ab5f..10443377fb9d 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -83,7 +83,7 @@ static int rds_release(struct socket *sock) /* * the binding lookup hash uses rcu, we need to - * make sure we sychronize_rcu before we free our + * make sure we synchronize_rcu before we free our * entry */ rds_remove_bound(rs); -- cgit v1.2.3 From d37aba521379203b740a2929e6e6f6bd2485f5d7 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 28 Aug 2014 13:54:18 +0200 Subject: ARM: tegra: remove unused tegra_emc.h The header file include/linux/platform_data/tegra_emc.h does not seem to be used anywhere. It was orphaned by a7cbe92c "ARM: tegra: remove tegra EMC scaling driver". Remove it. Signed-off-by: Rasmus Villemoes Signed-off-by: Stephen Warren --- include/linux/platform_data/tegra_emc.h | 34 --------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 include/linux/platform_data/tegra_emc.h (limited to 'include/linux') diff --git a/include/linux/platform_data/tegra_emc.h b/include/linux/platform_data/tegra_emc.h deleted file mode 100644 index df67505e98f8..000000000000 --- a/include/linux/platform_data/tegra_emc.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2011 Google, Inc. - * - * Author: - * Colin Cross - * Olof Johansson - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __TEGRA_EMC_H_ -#define __TEGRA_EMC_H_ - -#define TEGRA_EMC_NUM_REGS 46 - -struct tegra_emc_table { - unsigned long rate; - u32 regs[TEGRA_EMC_NUM_REGS]; -}; - -struct tegra_emc_pdata { - int num_tables; - struct tegra_emc_table *tables; -}; - -#endif -- cgit v1.2.3 From 2b8941b962a9f24d61c2b3c2e889928e6cf3d82b Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 27 Aug 2014 11:17:56 -0400 Subject: NFSD: Update some as-yet unused 4.2 error codes Recent NFS v4.2 drafts have removed NFS4ERR_METADATA_NOTSUPP and reassigned the error code to NFS4ERR_UNION_NOTSUPP. I also add in the NFS4ERR_OFFLOAD_NO_REQS error code. We're not using any of these yet, so there's no harm done. Signed-off-by: Anna Schumaker Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsd.h | 2 +- include/linux/nfs4.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 847daf37e566..747f3b95bd11 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -251,7 +251,7 @@ void nfsd_lockd_shutdown(void); #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) #define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) #define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) -#define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) +#define nfserr_union_notsupp cpu_to_be32(NFS4ERR_UNION_NOTSUPP) #define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) #define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) #define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index a1e3064a8d99..79b2a0f5c318 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -235,10 +235,11 @@ enum nfsstat4 { /* nfs42 */ NFS4ERR_PARTNER_NOTSUPP = 10088, NFS4ERR_PARTNER_NO_AUTH = 10089, - NFS4ERR_METADATA_NOTSUPP = 10090, + NFS4ERR_UNION_NOTSUPP = 10090, NFS4ERR_OFFLOAD_DENIED = 10091, NFS4ERR_WRONG_LFS = 10092, NFS4ERR_BADLABEL = 10093, + NFS4ERR_OFFLOAD_NO_REQS = 10094, }; static inline bool seqid_mutating_err(u32 err) -- cgit v1.2.3 From abdc08a3a263a20e49534a36291d657bf53dda5b Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 19 Aug 2014 10:06:09 -0700 Subject: gpio: change gpiochip_request_own_desc() prototype The current prototype of gpiochip_request_own_desc() requires to obtain a pointer to a descriptor. This is in contradiction to all other GPIO request schemes, and imposes an extra step of obtaining a descriptor to drivers. Most drivers actually cannot even perform that step since the function that does it (gpichip_get_desc()) is gpiolib-private. Change gpiochip_request_own_desc() to return a descriptor from a (chip, hwnum) tuple and update users of this function (currently gpiolib-acpi only). Signed-off-by: Alexandre Courbot Tested-by: Mika Westerberg Signed-off-by: Linus Walleij --- Documentation/gpio/driver.txt | 3 ++- drivers/gpio/gpiolib-acpi.c | 20 +++----------------- drivers/gpio/gpiolib.c | 18 ++++++++++++++---- include/linux/gpio/driver.h | 3 ++- 4 files changed, 21 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt index 18790c237977..23b751a10d7b 100644 --- a/Documentation/gpio/driver.txt +++ b/Documentation/gpio/driver.txt @@ -178,7 +178,8 @@ does not help since it pins the module to the kernel forever (it calls try_module_get()). A GPIO driver can use the following functions instead to request and free descriptors without being pinned to the kernel forever. - int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label) + struct gpio_desc *gpiochip_request_own_desc(struct gpio_desc *desc, + const char *label) void gpiochip_free_own_desc(struct gpio_desc *desc) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 84540025aa08..f9103e72e2a4 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -145,14 +145,8 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, if (!handler) return AE_BAD_PARAMETER; - desc = gpiochip_get_desc(chip, pin); + desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event"); if (IS_ERR(desc)) { - dev_err(chip->dev, "Failed to get GPIO descriptor\n"); - return AE_ERROR; - } - - ret = gpiochip_request_own_desc(desc, "ACPI:Event"); - if (ret) { dev_err(chip->dev, "Failed to request GPIO\n"); return AE_ERROR; } @@ -420,22 +414,14 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, } } if (!found) { - int ret; - - desc = gpiochip_get_desc(chip, pin); + desc = gpiochip_request_own_desc(chip, pin, + "ACPI:OpRegion"); if (IS_ERR(desc)) { status = AE_ERROR; mutex_unlock(&achip->conn_lock); goto out; } - ret = gpiochip_request_own_desc(desc, "ACPI:OpRegion"); - if (ret) { - status = AE_ERROR; - mutex_unlock(&achip->conn_lock); - goto out; - } - switch (agpio->io_restriction) { case ACPI_IO_RESTRICT_INPUT: gpiod_direction_input(desc); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 15cc0bb65dda..a5831d6a9b91 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -895,12 +895,22 @@ EXPORT_SYMBOL_GPL(gpiochip_is_requested); * allows the GPIO chip module to be unloaded as needed (we assume that the * GPIO chip driver handles freeing the GPIOs it has requested). */ -int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label) +struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum, + const char *label) { - if (!desc || !desc->chip) - return -EINVAL; + struct gpio_desc *desc = gpiochip_get_desc(chip, hwnum); + int err; + + if (IS_ERR(desc)) { + chip_err(chip, "failed to get GPIO descriptor\n"); + return desc; + } + + err = __gpiod_request(desc, label); + if (err < 0) + return ERR_PTR(err); - return __gpiod_request(desc, label); + return desc; } EXPORT_SYMBOL_GPL(gpiochip_request_own_desc); diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index e78a2373e374..a2de58fffd19 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -166,7 +166,8 @@ int gpiochip_irqchip_add(struct gpio_chip *gpiochip, #endif /* CONFIG_GPIO_IRQCHIP */ -int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label); +struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum, + const char *label); void gpiochip_free_own_desc(struct gpio_desc *desc); #else /* CONFIG_GPIOLIB */ -- cgit v1.2.3 From 7179569aeb52197fd2a9909ba226c4c9cc0e2e2a Mon Sep 17 00:00:00 2001 From: Heiko Stübner Date: Thu, 28 Aug 2014 12:36:04 -0700 Subject: regulator: core: Add REGULATOR_EVENT_PRE_VOLTAGE_CHANGE (and ABORT) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some cases we need to know when a regulator is about to be changed. Add a way for clients to be notified. Note that for set_voltage() we don't necessarily know what voltage we'll end up with, so we tell the client what the range will be so they can prepare. Signed-off-by: Heiko Stübner Signed-off-by: Doug Anderson Signed-off-by: Mark Brown --- drivers/regulator/core.c | 63 +++++++++++++++++++++++++++++++++----- include/linux/regulator/consumer.h | 20 ++++++++++++ 2 files changed, 76 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index a3c3785901f5..dabc8e8862c8 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -102,7 +102,7 @@ static int _regulator_disable(struct regulator_dev *rdev); static int _regulator_get_voltage(struct regulator_dev *rdev); static int _regulator_get_current_limit(struct regulator_dev *rdev); static unsigned int _regulator_get_mode(struct regulator_dev *rdev); -static void _notifier_call_chain(struct regulator_dev *rdev, +static int _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data); static int _regulator_do_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV); @@ -2369,6 +2369,55 @@ int regulator_is_supported_voltage(struct regulator *regulator, } EXPORT_SYMBOL_GPL(regulator_is_supported_voltage); +static int _regulator_call_set_voltage(struct regulator_dev *rdev, + int min_uV, int max_uV, + unsigned *selector) +{ + struct pre_voltage_change_data data; + int ret; + + data.old_uV = _regulator_get_voltage(rdev); + data.min_uV = min_uV; + data.max_uV = max_uV; + ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE, + &data); + if (ret & NOTIFY_STOP_MASK) + return -EINVAL; + + ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV, selector); + if (ret >= 0) + return ret; + + _notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE, + (void *)data.old_uV); + + return ret; +} + +static int _regulator_call_set_voltage_sel(struct regulator_dev *rdev, + int uV, unsigned selector) +{ + struct pre_voltage_change_data data; + int ret; + + data.old_uV = _regulator_get_voltage(rdev); + data.min_uV = uV; + data.max_uV = uV; + ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE, + &data); + if (ret & NOTIFY_STOP_MASK) + return -EINVAL; + + ret = rdev->desc->ops->set_voltage_sel(rdev, selector); + if (ret >= 0) + return ret; + + _notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE, + (void *)data.old_uV); + + return ret; +} + static int _regulator_do_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV) { @@ -2396,8 +2445,8 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev, } if (rdev->desc->ops->set_voltage) { - ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV, - &selector); + ret = _regulator_call_set_voltage(rdev, min_uV, max_uV, + &selector); if (ret >= 0) { if (rdev->desc->ops->list_voltage) @@ -2432,8 +2481,8 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev, if (old_selector == selector) ret = 0; else - ret = rdev->desc->ops->set_voltage_sel( - rdev, ret); + ret = _regulator_call_set_voltage_sel( + rdev, best_val, selector); } else { ret = -EINVAL; } @@ -3079,11 +3128,11 @@ EXPORT_SYMBOL_GPL(regulator_unregister_notifier); /* notify regulator consumers and downstream regulator consumers. * Note mutex must be held by caller. */ -static void _notifier_call_chain(struct regulator_dev *rdev, +static int _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data) { /* call rdev chain first */ - blocking_notifier_call_chain(&rdev->notifier, event, data); + return blocking_notifier_call_chain(&rdev->notifier, event, data); } /** diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index f8a8733068a7..d347c805f923 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -93,7 +93,12 @@ struct regmap; * OVER_TEMP Regulator over temp. * FORCE_DISABLE Regulator forcibly shut down by software. * VOLTAGE_CHANGE Regulator voltage changed. + * Data passed is old voltage cast to (void *). * DISABLE Regulator was disabled. + * PRE_VOLTAGE_CHANGE Regulator is about to have voltage changed. + * Data passed is "struct pre_voltage_change_data" + * ABORT_VOLTAGE_CHANGE Regulator voltage change failed for some reason. + * Data passed is old voltage cast to (void *). * * NOTE: These events can be OR'ed together when passed into handler. */ @@ -106,6 +111,21 @@ struct regmap; #define REGULATOR_EVENT_FORCE_DISABLE 0x20 #define REGULATOR_EVENT_VOLTAGE_CHANGE 0x40 #define REGULATOR_EVENT_DISABLE 0x80 +#define REGULATOR_EVENT_PRE_VOLTAGE_CHANGE 0x100 +#define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE 0x200 + +/** + * struct pre_voltage_change_data - Data sent with PRE_VOLTAGE_CHANGE event + * + * @old_uV: Current voltage before change. + * @min_uV: Min voltage we'll change to. + * @max_uV: Max voltage we'll change to. + */ +struct pre_voltage_change_data { + unsigned long old_uV; + unsigned long min_uV; + unsigned long max_uV; +}; struct regulator; -- cgit v1.2.3 From 656473003bc7e056c3bbd4a4d9832dad01e86f76 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 29 Aug 2014 14:01:17 +0200 Subject: KVM: forward declare structs in kvm_types.h Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid "'struct foo' declared inside parameter list" warnings (and consequent breakage due to conflicting types). Move them from individual files to a generic place in linux/kvm_types.h. Signed-off-by: Paolo Bonzini --- arch/arm/include/asm/kvm_host.h | 7 ++----- arch/arm64/include/asm/kvm_host.h | 6 ++---- arch/ia64/include/asm/kvm_host.h | 3 --- arch/mips/include/asm/kvm_host.h | 5 ----- arch/powerpc/include/asm/kvm_host.h | 5 ----- arch/s390/include/asm/kvm_host.h | 5 +++-- arch/x86/include/asm/kvm_host.h | 4 ---- include/linux/kvm_host.h | 6 ------ include/linux/kvm_types.h | 14 ++++++++++++++ 9 files changed, 21 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6dfb404f6c46..4843397b812c 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -19,6 +19,8 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__ +#include +#include #include #include #include @@ -40,7 +42,6 @@ #include -struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -149,20 +150,17 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); u64 kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -187,7 +185,6 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); -struct kvm_one_reg; int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e10c45a578e3..766147baae0b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -22,6 +22,8 @@ #ifndef __ARM64_KVM_HOST_H__ #define __ARM64_KVM_HOST_H__ +#include +#include #include #include #include @@ -41,7 +43,6 @@ #define KVM_VCPU_MAX_FEATURES 3 -struct kvm_vcpu; int kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_dev_ioctl_check_extension(long ext); @@ -164,18 +165,15 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init); int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); -struct kvm_one_reg; int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index db95f570705f..fccc09d04649 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -234,9 +234,6 @@ struct kvm_vm_data { #define KVM_REQ_PTC_G 32 #define KVM_REQ_RESUME 33 -struct kvm; -struct kvm_vcpu; - struct kvm_mmio_req { uint64_t addr; /* physical address */ uint64_t size; /* size in bytes */ diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 7a3fc67bd7f9..b93bc80ed7e7 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -96,11 +96,6 @@ #define CAUSEB_DC 27 #define CAUSEF_DC (_ULCAST_(1) << 27) -struct kvm; -struct kvm_run; -struct kvm_vcpu; -struct kvm_interrupt; - extern atomic_t kvm_mips_instance; extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn); extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 98d9dd50d063..0e597283c5c6 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -53,7 +53,6 @@ #define KVM_ARCH_WANT_MMU_NOTIFIER -struct kvm; extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); extern int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); @@ -76,10 +75,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); /* Physical Address Mask - allowed range of real mode RAM access */ #define KVM_PAM 0x0fffffffffffffffULL -struct kvm; -struct kvm_run; -struct kvm_vcpu; - struct lppaca; struct slb_shadow; struct dtl_entry; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 773bef7614d8..d71291d3fb6f 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -13,8 +13,11 @@ #ifndef ASM_KVM_HOST_H #define ASM_KVM_HOST_H + +#include #include #include +#include #include #include #include @@ -431,8 +434,6 @@ static inline bool kvm_is_error_hva(unsigned long addr) } #define ASYNC_PF_PER_VCPU 64 -struct kvm_vcpu; -struct kvm_async_pf; struct kvm_arch_async_pf { unsigned long pfault_token; }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ac0f90e26a0b..567fface45f8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -99,10 +99,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define ASYNC_PF_PER_VCPU 64 -struct kvm_vcpu; -struct kvm; -struct kvm_async_pf; - enum kvm_reg { VCPU_REGS_RAX = 0, VCPU_REGS_RCX = 1, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ebd723676633..e1cb915a1096 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -140,8 +140,6 @@ static inline bool is_error_page(struct page *page) #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -struct kvm; -struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; extern spinlock_t kvm_lock; @@ -325,8 +323,6 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -struct kvm_irq_routing_table; - #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 #endif @@ -1036,8 +1032,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) extern bool kvm_rebooting; -struct kvm_device_ops; - struct kvm_device { struct kvm_device_ops *ops; struct kvm *kvm; diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index b0bcce0ddc95..b606bb689a3e 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -17,6 +17,20 @@ #ifndef __KVM_TYPES_H__ #define __KVM_TYPES_H__ +struct kvm; +struct kvm_async_pf; +struct kvm_device_ops; +struct kvm_interrupt; +struct kvm_irq_routing_table; +struct kvm_memory_slot; +struct kvm_one_reg; +struct kvm_run; +struct kvm_userspace_memory_region; +struct kvm_vcpu; +struct kvm_vcpu_init; + +enum kvm_mr_change; + #include /* -- cgit v1.2.3 From 13a34e067eab24fec882e1834fbf2cc31911d474 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 28 Aug 2014 15:13:03 +0200 Subject: KVM: remove garbage arg to *hardware_{en,dis}able MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the beggining was on_each_cpu(), which required an unused argument to kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten. Remove unnecessary arguments that stem from this. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm/kvm/arm.c | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- arch/ia64/kvm/kvm-ia64.c | 4 ++-- arch/mips/include/asm/kvm_host.h | 2 +- arch/mips/kvm/mips.c | 2 +- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/s390/include/asm/kvm_host.h | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 12 ++++++------ include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 4 ++-- 16 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index aea259e9431f..032a8538318a 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -230,7 +230,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) int kvm_perf_init(void); int kvm_perf_teardown(void); -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 132bb0d9c5ad..005a7b5fd0aa 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -87,7 +87,7 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void) return &kvm_arm_running_vcpu; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b5045e3e05f8..be9970a59497 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -242,7 +242,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic) } } -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 5e14dcaf844e..ec6b9acb6bea 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) static DEFINE_SPINLOCK(vp_lock); -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { long status; long tmp_base; @@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { long status; diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 0b24d6622ec1..f2c249796ea8 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -762,7 +762,7 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 0ec7490d70bd..e3b21e51ff7e 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -77,7 +77,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return 1; } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 237cc0cc80a2..604000882352 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -682,7 +682,7 @@ struct kvm_vcpu_arch { #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 72c3fc085207..da505237a664 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -384,7 +384,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, } EXPORT_SYMBOL_GPL(kvmppc_ld); -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { return 0; } diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f6dd90684b97..a76a124dff48 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -452,7 +452,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, extern int sie64a(struct kvm_s390_sie_block *, u64 *); extern char sie_exit; -static inline void kvm_arch_hardware_disable(void *garbage) {} +static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_check_processor_compat(void *rtn) {} static inline void kvm_arch_exit(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b8fe1ae777db..628e992eeded 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -100,7 +100,7 @@ int test_vfacility(unsigned long nr) } /* Section: not file related */ -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { /* every s390 is virtualization enabled ;-) */ return 0; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 567fface45f8..73e4149eda33 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -661,8 +661,8 @@ struct msr_data { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - int (*hardware_enable)(void *dummy); - void (*hardware_disable)(void *dummy); + int (*hardware_enable)(void); + void (*hardware_disable)(void); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7cd230e55118..8ef704037370 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -622,7 +622,7 @@ static int has_svm(void) return 1; } -static void svm_hardware_disable(void *garbage) +static void svm_hardware_disable(void) { /* Make sure we clean up behind us */ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) @@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage) amd_pmu_disable_virt(); } -static int svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void) { struct svm_cpu_data *sd; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d70550d0bcff..671ca5edc709 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2728,7 +2728,7 @@ static void kvm_cpu_vmxon(u64 addr) : "memory", "cc"); } -static int hardware_enable(void *garbage) +static int hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2792,7 +2792,7 @@ static void kvm_cpu_vmxoff(void) asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); } -static void hardware_disable(void *garbage) +static void hardware_disable(void) { if (vmm_exclusive) { vmclear_local_loaded_vmcss(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c10408ef9ab1..a375dfc42f6a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) } EXPORT_SYMBOL_GPL(kvm_set_shared_msr); -static void drop_user_return_notifiers(void *ignore) +static void drop_user_return_notifiers(void) { unsigned int cpu = smp_processor_id(); struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); @@ -6959,7 +6959,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) kvm_rip_write(vcpu, 0); } -int kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void) { struct kvm *kvm; struct kvm_vcpu *vcpu; @@ -6970,7 +6970,7 @@ int kvm_arch_hardware_enable(void *garbage) bool stable, backwards_tsc = false; kvm_shared_msr_cpu_online(); - ret = kvm_x86_ops->hardware_enable(garbage); + ret = kvm_x86_ops->hardware_enable(); if (ret != 0) return ret; @@ -7051,10 +7051,10 @@ int kvm_arch_hardware_enable(void *garbage) return 0; } -void kvm_arch_hardware_disable(void *garbage) +void kvm_arch_hardware_disable(void) { - kvm_x86_ops->hardware_disable(garbage); - drop_user_return_notifiers(garbage); + kvm_x86_ops->hardware_disable(); + drop_user_return_notifiers(); } int kvm_arch_hardware_setup(void) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e1cb915a1096..e098dce179df 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -630,8 +630,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_hardware_enable(void *garbage); -void kvm_arch_hardware_disable(void *garbage); +int kvm_arch_hardware_enable(void); +void kvm_arch_hardware_disable(void); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1d03967def40..7176929a4cda 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2669,7 +2669,7 @@ static void hardware_enable_nolock(void *junk) cpumask_set_cpu(cpu, cpus_hardware_enabled); - r = kvm_arch_hardware_enable(NULL); + r = kvm_arch_hardware_enable(); if (r) { cpumask_clear_cpu(cpu, cpus_hardware_enabled); @@ -2694,7 +2694,7 @@ static void hardware_disable_nolock(void *junk) if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; cpumask_clear_cpu(cpu, cpus_hardware_enabled); - kvm_arch_hardware_disable(NULL); + kvm_arch_hardware_disable(); } static void hardware_disable(void) -- cgit v1.2.3 From 10c51b56232d24f150e39884a9e749fd99cbc60c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 27 Aug 2014 11:11:27 +0200 Subject: net: add skb_get_tx_queue() helper Replace occurences of skb_get_queue_mapping() and follow-up netdev_get_tx_queue() with an actual helper function. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ net/core/netpoll.c | 2 +- net/core/pktgen.c | 4 +--- net/packet/af_packet.c | 4 +--- net/sched/sch_generic.c | 6 ++++-- 5 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 429801370d0c..dfc1d8b8bd0f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1747,6 +1747,12 @@ struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, return &dev->_tx[index]; } +static inline struct netdev_queue *skb_get_tx_queue(const struct net_device *dev, + const struct sk_buff *skb) +{ + return netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); +} + static inline void netdev_for_each_tx_queue(struct net_device *dev, void (*f)(struct net_device *, struct netdev_queue *, diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a5ad06828d67..12b1df976562 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -115,7 +115,7 @@ static void queue_process(struct work_struct *work) continue; } - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(dev, skb); local_irq_save(flags); HARD_TX_LOCK(dev, txq, smp_processor_id()); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 83e2b4b19eb7..d81b540096c3 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3286,7 +3286,6 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) { struct net_device *odev = pkt_dev->odev; struct netdev_queue *txq; - u16 queue_map; int ret; /* If device is offline, then don't send */ @@ -3324,8 +3323,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) if (pkt_dev->delay && pkt_dev->last_ok) spin(pkt_dev, pkt_dev->next_tx); - queue_map = skb_get_queue_mapping(pkt_dev->skb); - txq = netdev_get_tx_queue(odev, queue_map); + txq = skb_get_tx_queue(odev, pkt_dev->skb); local_bh_disable(); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0dfa990d4eaa..b7a7f5a721bd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -243,7 +243,6 @@ static int packet_direct_xmit(struct sk_buff *skb) netdev_features_t features; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; - u16 queue_map; if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) @@ -254,8 +253,7 @@ static int packet_direct_xmit(struct sk_buff *skb) __skb_linearize(skb)) goto drop; - queue_map = skb_get_queue_mapping(skb); - txq = netdev_get_tx_queue(dev, queue_map); + txq = skb_get_tx_queue(dev, skb); local_bh_disable(); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index fc04fe93c2da..05b3f5d104af 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -63,7 +63,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) if (unlikely(skb)) { /* check the reason of requeuing without tx lock first */ - txq = netdev_get_tx_queue(txq->dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(txq->dev, skb); if (!netif_xmit_frozen_or_stopped(txq)) { q->gso_skb = NULL; q->q.qlen--; @@ -183,10 +183,12 @@ static inline int qdisc_restart(struct Qdisc *q) skb = dequeue_skb(q); if (unlikely(!skb)) return 0; + WARN_ON_ONCE(skb_dst_is_noref(skb)); + root_lock = qdisc_lock(q); dev = qdisc_dev(q); - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(dev, skb); return sch_direct_xmit(skb, q, dev, txq, root_lock); } -- cgit v1.2.3 From 18fe8db5f2b53e4ac67b47048f24f50c57a2a759 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 28 Aug 2014 13:44:31 +0200 Subject: include/linux/cycx_x25.h: Remove unused header The header file include/linux/cycx_x25.h does not seem to be used anywhere. It was orphaned by 6fcdf4facb "wanrouter: delete now orphaned header content, files/drivers". Remove it. Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- include/linux/cycx_x25.h | 125 ----------------------------------------------- 1 file changed, 125 deletions(-) delete mode 100644 include/linux/cycx_x25.h (limited to 'include/linux') diff --git a/include/linux/cycx_x25.h b/include/linux/cycx_x25.h deleted file mode 100644 index 362bf19d6cf1..000000000000 --- a/include/linux/cycx_x25.h +++ /dev/null @@ -1,125 +0,0 @@ -#ifndef _CYCX_X25_H -#define _CYCX_X25_H -/* -* cycx_x25.h Cyclom X.25 firmware API definitions. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdla_x25.h by Gene Kozin <74604.152@compuserve.com> -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 2000/04/02 acme dprintk and cycx_debug -* 1999/01/03 acme judicious use of data types -* 1999/01/02 acme #define X25_ACK_N3 0x4411 -* 1998/12/28 acme cleanup: lot'o'things removed -* commands listed, -* TX25Cmd & TX25Config structs -* typedef'ed -*/ -#ifndef PACKED -#define PACKED __attribute__((packed)) -#endif - -/* X.25 shared memory layout. */ -#define X25_MBOX_OFFS 0x300 /* general mailbox block */ -#define X25_RXMBOX_OFFS 0x340 /* receive mailbox */ - -/* Debug */ -#define dprintk(level, format, a...) if (cycx_debug >= level) printk(format, ##a) - -extern unsigned int cycx_debug; - -/* Data Structures */ -/* X.25 Command Block. */ -struct cycx_x25_cmd { - u16 command; - u16 link; /* values: 0 or 1 */ - u16 len; /* values: 0 thru 0x205 (517) */ - u32 buf; -} PACKED; - -/* Defines for the 'command' field. */ -#define X25_CONNECT_REQUEST 0x4401 -#define X25_CONNECT_RESPONSE 0x4402 -#define X25_DISCONNECT_REQUEST 0x4403 -#define X25_DISCONNECT_RESPONSE 0x4404 -#define X25_DATA_REQUEST 0x4405 -#define X25_ACK_TO_VC 0x4406 -#define X25_INTERRUPT_RESPONSE 0x4407 -#define X25_CONFIG 0x4408 -#define X25_CONNECT_INDICATION 0x4409 -#define X25_CONNECT_CONFIRM 0x440A -#define X25_DISCONNECT_INDICATION 0x440B -#define X25_DISCONNECT_CONFIRM 0x440C -#define X25_DATA_INDICATION 0x440E -#define X25_INTERRUPT_INDICATION 0x440F -#define X25_ACK_FROM_VC 0x4410 -#define X25_ACK_N3 0x4411 -#define X25_CONNECT_COLLISION 0x4413 -#define X25_N3WIN 0x4414 -#define X25_LINE_ON 0x4415 -#define X25_LINE_OFF 0x4416 -#define X25_RESET_REQUEST 0x4417 -#define X25_LOG 0x4500 -#define X25_STATISTIC 0x4600 -#define X25_TRACE 0x4700 -#define X25_N2TRACEXC 0x4702 -#define X25_N3TRACEXC 0x4703 - -/** - * struct cycx_x25_config - cyclom2x x25 firmware configuration - * @link - link number - * @speed - line speed - * @clock - internal/external - * @n2 - # of level 2 retransm.(values: 1 thru FF) - * @n2win - level 2 window (values: 1 thru 7) - * @n3win - level 3 window (values: 1 thru 7) - * @nvc - # of logical channels (values: 1 thru 64) - * @pktlen - level 3 packet length - log base 2 of size - * @locaddr - my address - * @remaddr - remote address - * @t1 - time, in seconds - * @t2 - time, in seconds - * @t21 - time, in seconds - * @npvc - # of permanent virt. circuits (1 thru nvc) - * @t23 - time, in seconds - * @flags - see dosx25.doc, in portuguese, for details - */ -struct cycx_x25_config { - u8 link; - u8 speed; - u8 clock; - u8 n2; - u8 n2win; - u8 n3win; - u8 nvc; - u8 pktlen; - u8 locaddr; - u8 remaddr; - u16 t1; - u16 t2; - u8 t21; - u8 npvc; - u8 t23; - u8 flags; -} PACKED; - -struct cycx_x25_stats { - u16 rx_crc_errors; - u16 rx_over_errors; - u16 n2_tx_frames; - u16 n2_rx_frames; - u16 tx_timeouts; - u16 rx_timeouts; - u16 n3_tx_packets; - u16 n3_rx_packets; - u16 tx_aborts; - u16 rx_aborts; -} PACKED; -#endif /* _CYCX_X25_H */ -- cgit v1.2.3 From fbd74659d4513816a6249b0db491e8d831803520 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 28 Aug 2014 13:44:32 +0200 Subject: include/linux/i82593.h: Remove unused header The header file include/linux/i82593.h does not seem to be used anywhere. It was orphaned by 8a594170 "drivers/net: delete intel i825xx based znet notebook driver". Remove it. Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- include/linux/i82593.h | 229 ------------------------------------------------- 1 file changed, 229 deletions(-) delete mode 100644 include/linux/i82593.h (limited to 'include/linux') diff --git a/include/linux/i82593.h b/include/linux/i82593.h deleted file mode 100644 index afac5c7a323d..000000000000 --- a/include/linux/i82593.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Definitions for Intel 82593 CSMA/CD Core LAN Controller - * The definitions are taken from the 1992 users manual with Intel - * order number 297125-001. - * - * /usr/src/pc/RCS/i82593.h,v 1.1 1996/07/17 15:23:12 root Exp - * - * Copyright 1994, Anders Klemets - * - * HISTORY - * i82593.h,v - * Revision 1.4 2005/11/4 09:15:00 baroniunas - * Modified copyright with permission of author as follows: - * - * "If I82539.H is the only file with my copyright statement - * that is included in the Source Forge project, then you have - * my approval to change the copyright statement to be a GPL - * license, in the way you proposed on October 10." - * - * Revision 1.1 1996/07/17 15:23:12 root - * Initial revision - * - * Revision 1.3 1995/04/05 15:13:58 adj - * Initial alpha release - * - * Revision 1.2 1994/06/16 23:57:31 klemets - * Mirrored all the fields in the configuration block. - * - * Revision 1.1 1994/06/02 20:25:34 klemets - * Initial revision - * - * - */ -#ifndef _I82593_H -#define _I82593_H - -/* Intel 82593 CSMA/CD Core LAN Controller */ - -/* Port 0 Command Register definitions */ - -/* Execution operations */ -#define OP0_NOP 0 /* CHNL = 0 */ -#define OP0_SWIT_TO_PORT_1 0 /* CHNL = 1 */ -#define OP0_IA_SETUP 1 -#define OP0_CONFIGURE 2 -#define OP0_MC_SETUP 3 -#define OP0_TRANSMIT 4 -#define OP0_TDR 5 -#define OP0_DUMP 6 -#define OP0_DIAGNOSE 7 -#define OP0_TRANSMIT_NO_CRC 9 -#define OP0_RETRANSMIT 12 -#define OP0_ABORT 13 -/* Reception operations */ -#define OP0_RCV_ENABLE 8 -#define OP0_RCV_DISABLE 10 -#define OP0_STOP_RCV 11 -/* Status pointer control operations */ -#define OP0_FIX_PTR 15 /* CHNL = 1 */ -#define OP0_RLS_PTR 15 /* CHNL = 0 */ -#define OP0_RESET 14 - -#define CR0_CHNL (1 << 4) /* 0=Channel 0, 1=Channel 1 */ -#define CR0_STATUS_0 0x00 -#define CR0_STATUS_1 0x20 -#define CR0_STATUS_2 0x40 -#define CR0_STATUS_3 0x60 -#define CR0_INT_ACK (1 << 7) /* 0=No ack, 1=acknowledge */ - -/* Port 0 Status Register definitions */ - -#define SR0_NO_RESULT 0 /* dummy */ -#define SR0_EVENT_MASK 0x0f -#define SR0_IA_SETUP_DONE 1 -#define SR0_CONFIGURE_DONE 2 -#define SR0_MC_SETUP_DONE 3 -#define SR0_TRANSMIT_DONE 4 -#define SR0_TDR_DONE 5 -#define SR0_DUMP_DONE 6 -#define SR0_DIAGNOSE_PASSED 7 -#define SR0_TRANSMIT_NO_CRC_DONE 9 -#define SR0_RETRANSMIT_DONE 12 -#define SR0_EXECUTION_ABORTED 13 -#define SR0_END_OF_FRAME 8 -#define SR0_RECEPTION_ABORTED 10 -#define SR0_DIAGNOSE_FAILED 15 -#define SR0_STOP_REG_HIT 11 - -#define SR0_CHNL (1 << 4) -#define SR0_EXECUTION (1 << 5) -#define SR0_RECEPTION (1 << 6) -#define SR0_INTERRUPT (1 << 7) -#define SR0_BOTH_RX_TX (SR0_EXECUTION | SR0_RECEPTION) - -#define SR3_EXEC_STATE_MASK 0x03 -#define SR3_EXEC_IDLE 0 -#define SR3_TX_ABORT_IN_PROGRESS 1 -#define SR3_EXEC_ACTIVE 2 -#define SR3_ABORT_IN_PROGRESS 3 -#define SR3_EXEC_CHNL (1 << 2) -#define SR3_STP_ON_NO_RSRC (1 << 3) -#define SR3_RCVING_NO_RSRC (1 << 4) -#define SR3_RCV_STATE_MASK 0x60 -#define SR3_RCV_IDLE 0x00 -#define SR3_RCV_READY 0x20 -#define SR3_RCV_ACTIVE 0x40 -#define SR3_RCV_STOP_IN_PROG 0x60 -#define SR3_RCV_CHNL (1 << 7) - -/* Port 1 Command Register definitions */ - -#define OP1_NOP 0 -#define OP1_SWIT_TO_PORT_0 1 -#define OP1_INT_DISABLE 2 -#define OP1_INT_ENABLE 3 -#define OP1_SET_TS 5 -#define OP1_RST_TS 7 -#define OP1_POWER_DOWN 8 -#define OP1_RESET_RING_MNGMT 11 -#define OP1_RESET 14 -#define OP1_SEL_RST 15 - -#define CR1_STATUS_4 0x00 -#define CR1_STATUS_5 0x20 -#define CR1_STATUS_6 0x40 -#define CR1_STOP_REG_UPDATE (1 << 7) - -/* Receive frame status bits */ - -#define RX_RCLD (1 << 0) -#define RX_IA_MATCH (1 << 1) -#define RX_NO_AD_MATCH (1 << 2) -#define RX_NO_SFD (1 << 3) -#define RX_SRT_FRM (1 << 7) -#define RX_OVRRUN (1 << 8) -#define RX_ALG_ERR (1 << 10) -#define RX_CRC_ERR (1 << 11) -#define RX_LEN_ERR (1 << 12) -#define RX_RCV_OK (1 << 13) -#define RX_TYP_LEN (1 << 15) - -/* Transmit status bits */ - -#define TX_NCOL_MASK 0x0f -#define TX_FRTL (1 << 4) -#define TX_MAX_COL (1 << 5) -#define TX_HRT_BEAT (1 << 6) -#define TX_DEFER (1 << 7) -#define TX_UND_RUN (1 << 8) -#define TX_LOST_CTS (1 << 9) -#define TX_LOST_CRS (1 << 10) -#define TX_LTCOL (1 << 11) -#define TX_OK (1 << 13) -#define TX_COLL (1 << 15) - -struct i82593_conf_block { - u_char fifo_limit : 4, - forgnesi : 1, - fifo_32 : 1, - d6mod : 1, - throttle_enb : 1; - u_char throttle : 6, - cntrxint : 1, - contin : 1; - u_char addr_len : 3, - acloc : 1, - preamb_len : 2, - loopback : 2; - u_char lin_prio : 3, - tbofstop : 1, - exp_prio : 3, - bof_met : 1; - u_char : 4, - ifrm_spc : 4; - u_char : 5, - slottim_low : 3; - u_char slottim_hi : 3, - : 1, - max_retr : 4; - u_char prmisc : 1, - bc_dis : 1, - : 1, - crs_1 : 1, - nocrc_ins : 1, - crc_1632 : 1, - : 1, - crs_cdt : 1; - u_char cs_filter : 3, - crs_src : 1, - cd_filter : 3, - : 1; - u_char : 2, - min_fr_len : 6; - u_char lng_typ : 1, - lng_fld : 1, - rxcrc_xf : 1, - artx : 1, - sarec : 1, - tx_jabber : 1, /* why is this called max_len in the manual? */ - hash_1 : 1, - lbpkpol : 1; - u_char : 6, - fdx : 1, - : 1; - u_char dummy_6 : 6, /* supposed to be ones */ - mult_ia : 1, - dis_bof : 1; - u_char dummy_1 : 1, /* supposed to be one */ - tx_ifs_retrig : 2, - mc_all : 1, - rcv_mon : 2, - frag_acpt : 1, - tstrttrs : 1; - u_char fretx : 1, - runt_eop : 1, - hw_sw_pin : 1, - big_endn : 1, - syncrqs : 1, - sttlen : 1, - tx_eop : 1, - rx_eop : 1; - u_char rbuf_size : 5, - rcvstop : 1, - : 2; -}; - -#define I82593_MAX_MULTICAST_ADDRESSES 128 /* Hardware hashed filter */ - -#endif /* _I82593_H */ -- cgit v1.2.3 From 6fb7c3778f0fba0bad099c30e834c413c4f8bcb5 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 28 Aug 2014 13:44:33 +0200 Subject: include/linux/phonedev.h: Remove unused header The header file include/linux/phonedev.h does not seem to be used anywhere. It was orphaned by 7326446c "Staging: remove telephony drivers". Remove it. Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller --- include/linux/phonedev.h | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 include/linux/phonedev.h (limited to 'include/linux') diff --git a/include/linux/phonedev.h b/include/linux/phonedev.h deleted file mode 100644 index 4269de99e320..000000000000 --- a/include/linux/phonedev.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __LINUX_PHONEDEV_H -#define __LINUX_PHONEDEV_H - -#include - -#ifdef __KERNEL__ - -#include - -struct phone_device { - struct phone_device *next; - const struct file_operations *f_op; - int (*open) (struct phone_device *, struct file *); - int board; /* Device private index */ - int minor; -}; - -extern int phonedev_init(void); -#define PHONE_MAJOR 100 -extern int phone_register_device(struct phone_device *, int unit); -#define PHONE_UNIT_ANY -1 -extern void phone_unregister_device(struct phone_device *); - -#endif -#endif -- cgit v1.2.3 From de20fe8e2cc3c4ca13fdb529e6720d9d199333fe Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 27 Aug 2014 21:26:35 -0700 Subject: net: Allocate a new 16 bits for flags in skbuff Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b69b7b512c06..3c9574c80933 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -598,6 +598,10 @@ struct sk_buff { __u32 reserved_tailroom; }; + kmemcheck_bitfield_begin(flags3); + /* 16 bit hole */ + kmemcheck_bitfield_end(flags3); + __be16 inner_protocol; __u16 inner_transport_header; __u16 inner_network_header; -- cgit v1.2.3 From 77cffe23c1f88835f6bd7b47bfa0c060c2969828 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 27 Aug 2014 21:26:46 -0700 Subject: net: Clarification of CHECKSUM_UNNECESSARY This patch: - Clarifies the specific requirements of devices returning CHECKSUM_UNNECESSARY (comments in skbuff.h). - Adds csum_level field to skbuff. This is used to express how many checksums are covered by CHECKSUM_UNNECESSARY (stores n - 1). This replaces the overloading of skb->encapsulation, that field is is now only used to indicate inner headers are valid. - Change __skb_checksum_validate_needed to "consume" each checksum as indicated by csum_level as layers of the the packet are parsed. - Remove skb_pop_rcv_encapsulation, no longer needed in the new csum_level model. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 2 -- include/linux/skbuff.h | 74 ++++++++++++++++++++++++++++++++++---------------- net/ipv4/gre_demux.c | 1 - 3 files changed, 51 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index beb377b2d4b7..67527f3d3be2 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1158,8 +1158,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (!vs) goto drop; - skb_pop_rcv_encapsulation(skb); - vs->rcv(vs, skb, vxh->vx_vni); return 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3c9574c80933..c93b5859a772 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -47,11 +47,29 @@ * * The hardware you're dealing with doesn't calculate the full checksum * (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums - * for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will - * set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still - * undefined in this case though. It is a bad option, but, unfortunately, - * nowadays most vendors do this. Apparently with the secret goal to sell - * you new devices, when you will add new protocol to your host, f.e. IPv6 8) + * for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY + * if their checksums are okay. skb->csum is still undefined in this case + * though. It is a bad option, but, unfortunately, nowadays most vendors do + * this. Apparently with the secret goal to sell you new devices, when you + * will add new protocol to your host, f.e. IPv6 8) + * + * CHECKSUM_UNNECESSARY is applicable to following protocols: + * TCP: IPv6 and IPv4. + * UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a + * zero UDP checksum for either IPv4 or IPv6, the networking stack + * may perform further validation in this case. + * GRE: only if the checksum is present in the header. + * SCTP: indicates the CRC in SCTP header has been validated. + * + * skb->csum_level indicates the number of consecutive checksums found in + * the packet minus one that have been verified as CHECKSUM_UNNECESSARY. + * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet + * and a device is able to verify the checksums for UDP (possibly zero), + * GRE (checksum flag is set), and TCP-- skb->csum_level would be set to + * two. If the device were only able to verify the UDP checksum and not + * GRE, either because it doesn't support GRE checksum of because GRE + * checksum is bad, skb->csum_level would be set to zero (TCP checksum is + * not considered in this case). * * CHECKSUM_COMPLETE: * @@ -112,6 +130,9 @@ #define CHECKSUM_COMPLETE 2 #define CHECKSUM_PARTIAL 3 +/* Maximum value in skb->csum_level */ +#define SKB_MAX_CSUM_LEVEL 3 + #define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES) #define SKB_WITH_OVERHEAD(X) \ ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) @@ -571,11 +592,7 @@ struct sk_buff { __u8 wifi_acked:1; __u8 no_fcs:1; __u8 head_frag:1; - /* Encapsulation protocol and NIC drivers should use - * this flag to indicate to each other if the skb contains - * encapsulated packet or not and maybe use the inner packet - * headers if needed - */ + /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; __u8 csum_valid:1; @@ -599,7 +616,8 @@ struct sk_buff { }; kmemcheck_bitfield_begin(flags3); - /* 16 bit hole */ + __u8 csum_level:2; + /* 14 bit hole */ kmemcheck_bitfield_end(flags3); __be16 inner_protocol; @@ -1866,18 +1884,6 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) return pskb_may_pull(skb, skb_network_offset(skb) + len); } -static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb) -{ - /* Only continue with checksum unnecessary if device indicated - * it is valid across encapsulation (skb->encapsulation was set). - */ - if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation) - skb->ip_summed = CHECKSUM_NONE; - - skb->encapsulation = 0; - skb->csum_valid = 0; -} - /* * CPUs often take a performance hit when accessing unaligned memory * locations. The actual performance hit varies, it can be small if the @@ -2798,6 +2804,27 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb) 0 : __skb_checksum_complete(skb); } +static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->csum_level == 0) + skb->ip_summed = CHECKSUM_NONE; + else + skb->csum_level--; + } +} + +static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->csum_level < SKB_MAX_CSUM_LEVEL) + skb->csum_level++; + } else if (skb->ip_summed == CHECKSUM_NONE) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = 0; + } +} + /* Check if we need to perform checksum complete validation. * * Returns true if checksum complete is needed, false otherwise @@ -2809,6 +2836,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, { if (skb_csum_unnecessary(skb) || (zero_okay && !check)) { skb->csum_valid = 1; + __skb_decr_checksum_unnecessary(skb); return false; } diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 7c1a8ff974dd..0485bf7f8f03 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -125,7 +125,6 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, *csum_err = true; return -EINVAL; } - skb_pop_rcv_encapsulation(skb); options++; } -- cgit v1.2.3 From 662880f4420340aad4f9a62a349c6c9d4faa1a5d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 27 Aug 2014 21:26:56 -0700 Subject: net: Allow GRO to use and set levels of checksum unnecessary Allow GRO path to "consume" checksums provided in CHECKSUM_UNNECESSARY and to report new checksums verfied for use in fallback to normal path. Change GRO checksum path to track csum_level using a csum_cnt field in NAPI_GRO_CB. On GRO initialization, if ip_summed is CHECKSUM_UNNECESSARY set NAPI_GRO_CB(skb)->csum_cnt to skb->csum_level + 1. For each checksum verified, decrement NAPI_GRO_CB(skb)->csum_cnt while its greater than zero. If a checksum is verfied and NAPI_GRO_CB(skb)->csum_cnt == 0, we have verified a deeper checksum than originally indicated in skbuf so increment csum_level (or initialize to CHECKSUM_UNNECESSARY if ip_summed is CHECKSUM_NONE or CHECKSUM_COMPLETE). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 26 ++++++++++++-------------- net/core/dev.c | 24 ++++++++++++++++-------- net/ipv4/gre_offload.c | 7 ++----- net/ipv4/udp_offload.c | 5 +++-- 4 files changed, 33 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dfc1d8b8bd0f..456eb1fe51e8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1883,8 +1883,8 @@ struct napi_gro_cb { /* GRO checksum is valid */ u8 csum_valid:1; - /* Number encapsulation layers crossed */ - u8 encapsulation; + /* Number of checksums via CHECKSUM_UNNECESSARY */ + u8 csum_cnt:3; /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2179,8 +2179,7 @@ static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, __sum16 check) { return (skb->ip_summed != CHECKSUM_PARTIAL && - (skb->ip_summed != CHECKSUM_UNNECESSARY || - (NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) && + NAPI_GRO_CB(skb)->csum_cnt == 0 && (!zero_okay || check)); } @@ -2196,18 +2195,17 @@ static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, return __skb_gro_checksum_complete(skb); } -/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level - * checksum or an encapsulated one during GRO. This saves work - * if we fallback to normal path with the packet. - */ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) { - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (NAPI_GRO_CB(skb)->encapsulation) - skb->encapsulation = 1; - } else if (skb->ip_summed != CHECKSUM_PARTIAL) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->encapsulation = 0; + if (NAPI_GRO_CB(skb)->csum_cnt > 0) { + /* Consume a checksum from CHECKSUM_UNNECESSARY */ + NAPI_GRO_CB(skb)->csum_cnt--; + } else { + /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we + * verified a new top level checksum or an encapsulated one + * during GRO. This saves work if we fallback to normal path. + */ + __skb_incr_checksum_unnecessary(skb); } } diff --git a/net/core/dev.c b/net/core/dev.c index 26d296c2447c..a6077ef56345 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3962,13 +3962,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff gro_list_prepare(napi, skb); - if (skb->ip_summed == CHECKSUM_COMPLETE) { - NAPI_GRO_CB(skb)->csum = skb->csum; - NAPI_GRO_CB(skb)->csum_valid = 1; - } else { - NAPI_GRO_CB(skb)->csum_valid = 0; - } - rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { if (ptype->type != type || !ptype->callbacks.gro_receive) @@ -3980,7 +3973,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->udp_mark = 0; - NAPI_GRO_CB(skb)->encapsulation = 0; + + /* Setup for GRO checksum validation */ + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + NAPI_GRO_CB(skb)->csum = skb->csum; + NAPI_GRO_CB(skb)->csum_valid = 1; + NAPI_GRO_CB(skb)->csum_cnt = 0; + break; + case CHECKSUM_UNNECESSARY: + NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1; + NAPI_GRO_CB(skb)->csum_valid = 0; + break; + default: + NAPI_GRO_CB(skb)->csum_cnt = 0; + NAPI_GRO_CB(skb)->csum_valid = 0; + } pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index d1bd16937d93..a4d7965fb880 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -172,12 +172,9 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, } /* Don't bother verifying checksum if we're going to flush anyway. */ - if (greh->flags & GRE_CSUM) { - if (!NAPI_GRO_CB(skb)->flush && - skb_gro_checksum_simple_validate(skb)) + if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_simple_validate(skb)) goto out_unlock; - NAPI_GRO_CB(skb)->encapsulation++; - } flush = 0; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 8ed460e3753c..a6adff98382a 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -238,12 +238,13 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, int flush = 1; if (NAPI_GRO_CB(skb)->udp_mark || - (!skb->encapsulation && !NAPI_GRO_CB(skb)->csum_valid)) + (skb->ip_summed != CHECKSUM_PARTIAL && + NAPI_GRO_CB(skb)->csum_cnt == 0 && + !NAPI_GRO_CB(skb)->csum_valid)) goto out; /* mark that this skb passed once through the udp gro layer */ NAPI_GRO_CB(skb)->udp_mark = 1; - NAPI_GRO_CB(skb)->encapsulation++; rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); -- cgit v1.2.3 From 4e00517945bed110f1b8de580cce97626e9ef0b5 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sun, 31 Aug 2014 21:10:51 +0200 Subject: regulator: max1586: add device-tree support Add device-tree support to max1586. The driver can still be used with the legacy platform data, or the new device-tree way. This work is heavily inspired by the device-tree support of its cousin max8660 driver. Signed-off-by: Robert Jarzmik Signed-off-by: Mark Brown --- drivers/regulator/max1586.c | 81 ++++++++++++++++++++++++++++++++++++++- include/linux/regulator/max1586.h | 2 +- 2 files changed, 80 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/max1586.c b/drivers/regulator/max1586.c index d23d0577754b..5c04a7159baa 100644 --- a/drivers/regulator/max1586.c +++ b/drivers/regulator/max1586.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #define MAX1586_V3_MAX_VSEL 31 #define MAX1586_V6_MAX_VSEL 3 @@ -157,13 +159,87 @@ static struct regulator_desc max1586_reg[] = { }, }; +int of_get_max1586_platform_data(struct device *dev, + struct max1586_platform_data *pdata) +{ + struct max1586_subdev_data *sub; + struct of_regulator_match rmatch[ARRAY_SIZE(max1586_reg)]; + struct device_node *np = dev->of_node; + int i, matched; + + if (of_property_read_u32(np, "v3-gain", + &pdata->v3_gain) < 0) { + dev_err(dev, "%s has no 'v3-gain' property\n", np->full_name); + return -EINVAL; + } + + np = of_get_child_by_name(np, "regulators"); + if (!np) { + dev_err(dev, "missing 'regulators' subnode in DT\n"); + return -EINVAL; + } + + for (i = 0; i < ARRAY_SIZE(rmatch); i++) + rmatch[i].name = max1586_reg[i].name; + + matched = of_regulator_match(dev, np, rmatch, ARRAY_SIZE(rmatch)); + of_node_put(np); + /* + * If matched is 0, ie. neither Output_V3 nor Output_V6 have been found, + * return 0, which signals the normal situation where no subregulator is + * available. This is normal because the max1586 doesn't provide any + * readback support, so the subregulators can't report any status + * anyway. If matched < 0, return the error. + */ + if (matched <= 0) + return matched; + + pdata->subdevs = devm_kzalloc(dev, sizeof(struct max1586_subdev_data) * + matched, GFP_KERNEL); + if (!pdata->subdevs) + return -ENOMEM; + + pdata->num_subdevs = matched; + sub = pdata->subdevs; + + for (i = 0; i < matched; i++) { + sub->id = i; + sub->name = rmatch[i].of_node->name; + sub->platform_data = rmatch[i].init_data; + sub++; + } + + return 0; +} + +static const struct of_device_id max1586_of_match[] = { + { .compatible = "maxim,max1586", }, + {}, +}; +MODULE_DEVICE_TABLE(of, max1586_of_match); + static int max1586_pmic_probe(struct i2c_client *client, const struct i2c_device_id *i2c_id) { - struct max1586_platform_data *pdata = dev_get_platdata(&client->dev); + struct max1586_platform_data *pdata, pdata_of; struct regulator_config config = { }; struct max1586_data *max1586; - int i, id; + int i, id, ret; + const struct of_device_id *match; + + pdata = dev_get_platdata(&client->dev); + if (client->dev.of_node && !pdata) { + match = of_match_device(of_match_ptr(max1586_of_match), + &client->dev); + if (!match) { + dev_err(&client->dev, "Error: No device match found\n"); + return -ENODEV; + } + ret = of_get_max1586_platform_data(&client->dev, &pdata_of); + if (ret < 0) + return ret; + pdata = &pdata_of; + } max1586 = devm_kzalloc(&client->dev, sizeof(struct max1586_data), GFP_KERNEL); @@ -229,6 +305,7 @@ static struct i2c_driver max1586_pmic_driver = { .driver = { .name = "max1586", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(max1586_of_match), }, .id_table = max1586_id, }; diff --git a/include/linux/regulator/max1586.h b/include/linux/regulator/max1586.h index de9a7fae20be..cedd0febe882 100644 --- a/include/linux/regulator/max1586.h +++ b/include/linux/regulator/max1586.h @@ -40,7 +40,7 @@ */ struct max1586_subdev_data { int id; - char *name; + const char *name; struct regulator_init_data *platform_data; }; -- cgit v1.2.3 From 068765ba7987e73d4381edfe47b70aa121c7155c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 1 Sep 2014 13:47:49 +0200 Subject: PM / sleep: Mechanism for aborting system suspends unconditionally It sometimes may be necessary to abort a system suspend in progress or wake up the system from suspend-to-idle even if the pm_wakeup_event()/pm_stay_awake() mechanism is not enabled. For this purpose, introduce a new global variable pm_abort_suspend and make pm_wakeup_pending() check its value. Also add routines for manipulating that variable. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 16 +++++++++++++++- include/linux/suspend.h | 4 ++++ kernel/power/process.c | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index eb1bd2ecad8b..c2744b30d5d9 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -24,6 +24,9 @@ */ bool events_check_enabled __read_mostly; +/* If set and the system is suspending, terminate the suspend. */ +static bool pm_abort_suspend __read_mostly; + /* * Combined counters of registered wakeup events and wakeup events in progress. * They need to be modified together atomically, so it's better to use one @@ -719,7 +722,18 @@ bool pm_wakeup_pending(void) pm_print_active_wakeup_sources(); } - return ret; + return ret || pm_abort_suspend; +} + +void pm_system_wakeup(void) +{ + pm_abort_suspend = true; + freeze_wake(); +} + +void pm_wakeup_clear(void) +{ + pm_abort_suspend = false; } /** diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 519064e0c943..06a9910827c2 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -371,6 +371,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern bool events_check_enabled; extern bool pm_wakeup_pending(void); +extern void pm_system_wakeup(void); +extern void pm_wakeup_clear(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); @@ -418,6 +420,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) #define pm_notifier(fn, pri) do { (void)(fn); } while (0) static inline bool pm_wakeup_pending(void) { return false; } +static inline void pm_system_wakeup(void) {} +static inline void pm_wakeup_clear(void) {} static inline void lock_system_sleep(void) {} static inline void unlock_system_sleep(void) {} diff --git a/kernel/power/process.c b/kernel/power/process.c index 4ee194eb524b..7b323221b9ee 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -129,6 +129,7 @@ int freeze_processes(void) if (!pm_freezing) atomic_inc(&system_freezing_cnt); + pm_wakeup_clear(); printk("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); -- cgit v1.2.3 From cab303be91dc47942bc25de33dc1140123540800 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 28 Aug 2014 11:44:31 +0200 Subject: genirq: Add sanity checks for PM options on shared interrupt lines Account the IRQF_NO_SUSPEND and IRQF_RESUME_EARLY actions on shared interrupt lines and yell loudly if there is a mismatch. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- include/linux/irqdesc.h | 10 ++++++++++ kernel/irq/internals.h | 10 ++++++++++ kernel/irq/manage.c | 4 ++++ kernel/irq/pm.c | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 472c021a2d4f..cb1a31e448ae 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -36,6 +36,11 @@ struct irq_desc; * @threads_oneshot: bitfield to handle shared oneshot threads * @threads_active: number of irqaction threads currently running * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers + * @nr_actions: number of installed actions on this descriptor + * @no_suspend_depth: number of irqactions on a irq descriptor with + * IRQF_NO_SUSPEND set + * @force_resume_depth: number of irqactions on a irq descriptor with + * IRQF_FORCE_RESUME set * @dir: /proc/irq/ procfs entry * @name: flow handler name for /proc/interrupts output */ @@ -68,6 +73,11 @@ struct irq_desc { unsigned long threads_oneshot; atomic_t threads_active; wait_queue_head_t wait_for_threads; +#ifdef CONFIG_PM_SLEEP + unsigned int nr_actions; + unsigned int no_suspend_depth; + unsigned int force_resume_depth; +#endif #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; #endif diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index af2821178900..c402502a5111 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -194,3 +194,13 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *d __this_cpu_inc(*desc->kstat_irqs); __this_cpu_inc(kstat.irqs_sum); } + +#ifdef CONFIG_PM_SLEEP +void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action); +void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action); +#else +static inline void +irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { } +static inline void +irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { } +#endif diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index fa564e8db996..0a9104b4608b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1200,6 +1200,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) new->irq = irq; *old_ptr = new; + irq_pm_install_action(desc, new); + /* Reset broken irq detection when installing new handler */ desc->irq_count = 0; desc->irqs_unhandled = 0; @@ -1318,6 +1320,8 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) /* Found it - now remove it from the list of entries: */ *action_ptr = action->next; + irq_pm_remove_action(desc, action); + /* If this was the last handler, shut down the IRQ line: */ if (!desc->action) { irq_shutdown(desc); diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index b84141dcee5e..1b1b67a73218 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -13,6 +13,42 @@ #include "internals.h" +/* + * Called from __setup_irq() with desc->lock held after @action has + * been installed in the action chain. + */ +void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) +{ + desc->nr_actions++; + + if (action->flags & IRQF_FORCE_RESUME) + desc->force_resume_depth++; + + WARN_ON_ONCE(desc->force_resume_depth && + desc->force_resume_depth != desc->nr_actions); + + if (action->flags & IRQF_NO_SUSPEND) + desc->no_suspend_depth++; + + WARN_ON_ONCE(desc->no_suspend_depth && + desc->no_suspend_depth != desc->nr_actions); +} + +/* + * Called from __free_irq() with desc->lock held after @action has + * been removed from the action chain. + */ +void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) +{ + desc->nr_actions--; + + if (action->flags & IRQF_FORCE_RESUME) + desc->force_resume_depth--; + + if (action->flags & IRQF_NO_SUSPEND) + desc->no_suspend_depth--; +} + static void suspend_device_irq(struct irq_desc *desc, int irq) { if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)) -- cgit v1.2.3 From b76f16748fa61801b1a1fd3ffb6f25ee228a35e0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Aug 2014 13:54:09 +0200 Subject: genirq: Mark wakeup sources as armed on suspend This allows us to utilize this information in the irq_may_run() check without adding another conditional to the fast path. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- include/linux/irq.h | 8 ++++++++ kernel/irq/pm.c | 5 +++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 62af59242ddc..03f48d936f66 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -173,6 +173,7 @@ struct irq_data { * IRQD_IRQ_DISABLED - Disabled state of the interrupt * IRQD_IRQ_MASKED - Masked state of the interrupt * IRQD_IRQ_INPROGRESS - In progress state of the interrupt + * IRQD_WAKEUP_ARMED - Wakeup mode armed */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -186,6 +187,7 @@ enum { IRQD_IRQ_DISABLED = (1 << 16), IRQD_IRQ_MASKED = (1 << 17), IRQD_IRQ_INPROGRESS = (1 << 18), + IRQD_WAKEUP_ARMED = (1 << 19), }; static inline bool irqd_is_setaffinity_pending(struct irq_data *d) @@ -257,6 +259,12 @@ static inline bool irqd_irq_inprogress(struct irq_data *d) return d->state_use_accessors & IRQD_IRQ_INPROGRESS; } +static inline bool irqd_is_wakeup_armed(struct irq_data *d) +{ + return d->state_use_accessors & IRQD_WAKEUP_ARMED; +} + + /* * Functions for chained handlers which can be enabled/disabled by the * standard disable_irq/enable_irq calls. Must be called with diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index cf0ce0163db9..766930eaeed9 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -54,6 +54,9 @@ static bool suspend_device_irq(struct irq_desc *desc, int irq) if (!desc->action || desc->no_suspend_depth) return false; + if (irqd_is_wakeup_set(&desc->irq_data)) + irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); + desc->istate |= IRQS_SUSPENDED; __disable_irq(desc, irq); @@ -101,6 +104,8 @@ EXPORT_SYMBOL_GPL(suspend_device_irqs); static void resume_irq(struct irq_desc *desc, int irq) { + irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); + if (desc->istate & IRQS_SUSPENDED) goto resume; -- cgit v1.2.3 From 9ce7a25849e80cfb264f4995f832b932c1987e1a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Aug 2014 14:00:16 +0200 Subject: genirq: Simplify wakeup mechanism Currently we suspend wakeup interrupts by lazy disabling them and check later whether the interrupt has fired, but that's not sufficient for suspend to idle as there is no way to check that once we transitioned into the CPU idle state. So we change the mechanism in the following way: 1) Leave the wakeup interrupts enabled across suspend 2) Add a check to irq_may_run() which is called at the beginning of each flow handler whether the interrupt is an armed wakeup source. This check is basically free as it just extends the existing check for IRQD_IRQ_INPROGRESS. So no new conditional in the hot path. If the IRQD_WAKEUP_ARMED flag is set, then the interrupt is disabled, marked as pending/suspended and the pm core is notified about the wakeup event. Signed-off-by: Thomas Gleixner [ rjw: syscore.c and put irq_pm_check_wakeup() into pm.c ] Signed-off-by: Rafael J. Wysocki --- drivers/base/syscore.c | 7 +++--- include/linux/interrupt.h | 5 ----- kernel/irq/chip.c | 20 ++++++++++++++++- kernel/irq/internals.h | 2 ++ kernel/irq/pm.c | 55 +++++++++++++++++++++++++---------------------- 5 files changed, 53 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c index dbb8350ea8dc..8d98a329f6ea 100644 --- a/drivers/base/syscore.c +++ b/drivers/base/syscore.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include static LIST_HEAD(syscore_ops_list); @@ -54,9 +54,8 @@ int syscore_suspend(void) pr_debug("Checking wakeup interrupts\n"); /* Return error code if there are any wakeup interrupts pending. */ - ret = check_wakeup_irqs(); - if (ret) - return ret; + if (pm_wakeup_pending()) + return -EBUSY; WARN_ONCE(!irqs_disabled(), "Interrupts enabled before system core suspend.\n"); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 698ad053d064..69517a24bc50 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -193,11 +193,6 @@ extern void irq_wake_thread(unsigned int irq, void *dev_id); /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); extern void resume_device_irqs(void); -#ifdef CONFIG_PM_SLEEP -extern int check_wakeup_irqs(void); -#else -static inline int check_wakeup_irqs(void) { return 0; } -#endif /** * struct irq_affinity_notify - context for notification of IRQ affinity changes diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6baf86085571..e7917ff8a486 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -344,8 +344,26 @@ static bool irq_check_poll(struct irq_desc *desc) static bool irq_may_run(struct irq_desc *desc) { - if (!irqd_irq_inprogress(&desc->irq_data)) + unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED; + + /* + * If the interrupt is not in progress and is not an armed + * wakeup interrupt, proceed. + */ + if (!irqd_has_set(&desc->irq_data, mask)) return true; + + /* + * If the interrupt is an armed wakeup source, mark it pending + * and suspended, disable it and notify the pm core about the + * event. + */ + if (irq_pm_check_wakeup(desc)) + return false; + + /* + * Handle a potential concurrent poll on a different core. + */ return irq_check_poll(desc); } diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index c402502a5111..4332d766619d 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -196,9 +196,11 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *d } #ifdef CONFIG_PM_SLEEP +bool irq_pm_check_wakeup(struct irq_desc *desc); void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action); void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action); #else +static inline bool irq_pm_check_wakeup(struct irq_desc *desc) { return false; } static inline void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { } static inline void diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index 766930eaeed9..3ca532592704 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -9,10 +9,24 @@ #include #include #include +#include #include #include "internals.h" +bool irq_pm_check_wakeup(struct irq_desc *desc) +{ + if (irqd_is_wakeup_armed(&desc->irq_data)) { + irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); + desc->istate |= IRQS_SUSPENDED | IRQS_PENDING; + desc->depth++; + irq_disable(desc); + pm_system_wakeup(); + return true; + } + return false; +} + /* * Called from __setup_irq() with desc->lock held after @action has * been installed in the action chain. @@ -54,8 +68,16 @@ static bool suspend_device_irq(struct irq_desc *desc, int irq) if (!desc->action || desc->no_suspend_depth) return false; - if (irqd_is_wakeup_set(&desc->irq_data)) + if (irqd_is_wakeup_set(&desc->irq_data)) { irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); + /* + * We return true here to force the caller to issue + * synchronize_irq(). We need to make sure that the + * IRQD_WAKEUP_ARMED is visible before we return from + * suspend_device_irqs(). + */ + return true; + } desc->istate |= IRQS_SUSPENDED; __disable_irq(desc, irq); @@ -79,9 +101,13 @@ static bool suspend_device_irq(struct irq_desc *desc, int irq) * for this purpose. * * So we disable all interrupts and mark them IRQS_SUSPENDED except - * for those which are unused and those which are marked as not + * for those which are unused, those which are marked as not * suspendable via an interrupt request with the flag IRQF_NO_SUSPEND - * set. + * set and those which are marked as active wakeup sources. + * + * The active wakeup sources are handled by the flow handler entry + * code which checks for the IRQD_WAKEUP_ARMED flag, suspends the + * interrupt and notifies the pm core about the wakeup. */ void suspend_device_irqs(void) { @@ -173,26 +199,3 @@ void resume_device_irqs(void) resume_irqs(false); } EXPORT_SYMBOL_GPL(resume_device_irqs); - -/** - * check_wakeup_irqs - check if any wake-up interrupts are pending - */ -int check_wakeup_irqs(void) -{ - struct irq_desc *desc; - int irq; - - for_each_irq_desc(irq, desc) { - /* - * Only interrupts which are marked as wakeup source - * and have not been disabled before the suspend check - * can abort suspend. - */ - if (irqd_is_wakeup_set(&desc->irq_data)) { - if (desc->depth == 1 && desc->istate & IRQS_PENDING) - return -EBUSY; - } - } - - return 0; -} -- cgit v1.2.3 From 10b3ad8c21bb4b135768c30dd4c51a1c744da699 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 29 Aug 2014 21:07:24 -0700 Subject: net: Do txq_trans_update() in netdev_start_xmit() That way we don't have to audit every call site to make sure it is doing this properly. Signed-off-by: David S. Miller --- drivers/net/wan/dlci.c | 6 ++++-- include/linux/netdevice.h | 10 ++++++++-- net/core/dev.c | 7 ++----- net/core/netpoll.c | 4 +--- net/core/pktgen.c | 3 +-- net/packet/af_packet.c | 7 ++----- net/sched/sch_teql.c | 3 +-- 7 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c index 81b22a180aad..6427e8283419 100644 --- a/drivers/net/wan/dlci.c +++ b/drivers/net/wan/dlci.c @@ -192,8 +192,10 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev) { struct dlci_local *dlp = netdev_priv(dev); - if (skb) - netdev_start_xmit(skb, dlp->slave); + if (skb) { + struct netdev_queue *txq = skb_get_tx_queue(dev, skb); + netdev_start_xmit(skb, dlp->slave, txq); + } return NETDEV_TX_OK; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 456eb1fe51e8..16171802ea7d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3437,11 +3437,17 @@ static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, return ops->ndo_start_xmit(skb, dev); } -static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev) +static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq) { const struct net_device_ops *ops = dev->netdev_ops; + int rc; - return __netdev_start_xmit(ops, skb, dev); + rc = __netdev_start_xmit(ops, skb, dev); + if (rc == NETDEV_TX_OK) + txq_trans_update(txq); + + return rc; } int netdev_class_create_file_ns(struct class_attribute *class_attr, diff --git a/net/core/dev.c b/net/core/dev.c index a6077ef56345..6392adaaa22f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2666,10 +2666,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb_len = skb->len; trace_net_dev_start_xmit(skb, dev); - rc = netdev_start_xmit(skb, dev); + rc = netdev_start_xmit(skb, dev, txq); trace_net_dev_xmit(skb, rc, dev, skb_len); - if (rc == NETDEV_TX_OK) - txq_trans_update(txq); return rc; } @@ -2685,7 +2683,7 @@ gso: skb_len = nskb->len; trace_net_dev_start_xmit(nskb, dev); - rc = netdev_start_xmit(nskb, dev); + rc = netdev_start_xmit(nskb, dev, txq); trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) @@ -2694,7 +2692,6 @@ gso: skb->next = nskb; return rc; } - txq_trans_update(txq); if (unlikely(netif_xmit_stopped(txq) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 12b1df976562..05bc57edaa81 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -91,9 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } - status = netdev_start_xmit(skb, dev); - if (status == NETDEV_TX_OK) - txq_trans_update(txq); + status = netdev_start_xmit(skb, dev, txq); out: return status; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d81b540096c3..34bd2ff9f121 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3335,11 +3335,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) goto unlock; } atomic_inc(&(pkt_dev->skb->users)); - ret = netdev_start_xmit(pkt_dev->skb, odev); + ret = netdev_start_xmit(pkt_dev->skb, odev, txq); switch (ret) { case NETDEV_TX_OK: - txq_trans_update(txq); pkt_dev->last_ok = 1; pkt_dev->sofar++; pkt_dev->seq_num++; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b7a7f5a721bd..fe305a05a8fc 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -258,11 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb) local_bh_disable(); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_xmit_frozen_or_drv_stopped(txq)) { - ret = netdev_start_xmit(skb, dev); - if (ret == NETDEV_TX_OK) - txq_trans_update(txq); - } + if (!netif_xmit_frozen_or_drv_stopped(txq)) + ret = netdev_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); local_bh_enable(); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 64cd93ca8104..193dc2cba1ec 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -316,8 +316,7 @@ restart: unsigned int length = qdisc_pkt_len(skb); if (!netif_xmit_frozen_or_stopped(slave_txq) && - netdev_start_xmit(skb, slave) == NETDEV_TX_OK) { - txq_trans_update(slave_txq); + netdev_start_xmit(skb, slave, slave_txq) == NETDEV_TX_OK) { __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); -- cgit v1.2.3 From fa2dbdc253c2aee2a760c64de454cb62469ec11d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 29 Aug 2014 21:55:22 -0700 Subject: net: Pass a "more" indication down into netdev_start_xmit() code paths. For now it will always be false. Signed-off-by: David S. Miller --- drivers/net/wan/dlci.c | 2 +- include/linux/netdevice.h | 9 +++++---- net/atm/mpc.c | 2 +- net/core/dev.c | 2 +- net/core/netpoll.c | 2 +- net/core/pktgen.c | 2 +- net/packet/af_packet.c | 2 +- net/sched/sch_teql.c | 3 ++- 8 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c index 6427e8283419..ae6ecf401189 100644 --- a/drivers/net/wan/dlci.c +++ b/drivers/net/wan/dlci.c @@ -194,7 +194,7 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev) if (skb) { struct netdev_queue *txq = skb_get_tx_queue(dev, skb); - netdev_start_xmit(skb, dlp->slave, txq); + netdev_start_xmit(skb, dlp->slave, txq, false); } return NETDEV_TX_OK; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 16171802ea7d..5050218c5b7f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3431,19 +3431,20 @@ int __init dev_proc_init(void); #endif static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, - struct sk_buff *skb, struct net_device *dev) + struct sk_buff *skb, struct net_device *dev, + bool more) { - skb->xmit_more = 0; + skb->xmit_more = more ? 1 : 0; return ops->ndo_start_xmit(skb, dev); } static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq) + struct netdev_queue *txq, bool more) { const struct net_device_ops *ops = dev->netdev_ops; int rc; - rc = __netdev_start_xmit(ops, skb, dev); + rc = __netdev_start_xmit(ops, skb, dev, more); if (rc == NETDEV_TX_OK) txq_trans_update(txq); diff --git a/net/atm/mpc.c b/net/atm/mpc.c index d662da161e5a..0e982222d425 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb, } non_ip: - return __netdev_start_xmit(mpc->old_ops, skb, dev); + return __netdev_start_xmit(mpc->old_ops, skb, dev, false); } static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) diff --git a/net/core/dev.c b/net/core/dev.c index ab7bb809711e..f0ed5a611a97 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2610,7 +2610,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, len = skb->len; trace_net_dev_start_xmit(skb, dev); - rc = netdev_start_xmit(skb, dev, txq); + rc = netdev_start_xmit(skb, dev, txq, false); trace_net_dev_xmit(skb, rc, dev, len); return rc; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 05bc57edaa81..e6645b4f330a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -91,7 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } - status = netdev_start_xmit(skb, dev, txq); + status = netdev_start_xmit(skb, dev, txq, false); out: return status; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 34bd2ff9f121..5b36a9428c59 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3335,7 +3335,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) goto unlock; } atomic_inc(&(pkt_dev->skb->users)); - ret = netdev_start_xmit(pkt_dev->skb, odev, txq); + ret = netdev_start_xmit(pkt_dev->skb, odev, txq, false); switch (ret) { case NETDEV_TX_OK: diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index fe305a05a8fc..87d20f48ff06 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -259,7 +259,7 @@ static int packet_direct_xmit(struct sk_buff *skb) HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) - ret = netdev_start_xmit(skb, dev, txq); + ret = netdev_start_xmit(skb, dev, txq, false); HARD_TX_UNLOCK(dev, txq); local_bh_enable(); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 193dc2cba1ec..aaa8d03ed054 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -316,7 +316,8 @@ restart: unsigned int length = qdisc_pkt_len(skb); if (!netif_xmit_frozen_or_stopped(slave_txq) && - netdev_start_xmit(skb, slave, slave_txq) == NETDEV_TX_OK) { + netdev_start_xmit(skb, slave, slave_txq, false) == + NETDEV_TX_OK) { __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); -- cgit v1.2.3 From 50cbe9ab5f8d92d2d4a327b56e96559d8f63a1fa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 30 Aug 2014 19:13:51 -0700 Subject: net: Validate xmit SKBs right when we pull them out of the qdisc. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 6 +----- net/sched/sch_generic.c | 5 ++++- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5050218c5b7f..47c49ba2dcf4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2827,6 +2827,7 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *); int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_port_id *ppid); +struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index 704a5434f77d..75bc5b068a13 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2656,7 +2656,7 @@ struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t featur return skb; } -static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) +struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) { netdev_features_t features; @@ -2719,10 +2719,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, { int rc = NETDEV_TX_OK; - skb = validate_xmit_skb(skb, dev); - if (!skb) - return rc; - if (likely(!skb->next)) return xmit_one(skb, dev, txq, false); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 05b3f5d104af..f178798a5836 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -70,8 +70,11 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) } else skb = NULL; } else { - if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) + if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) { skb = q->dequeue(q); + if (skb) + skb = validate_xmit_skb(skb, qdisc_dev(q)); + } } return skb; -- cgit v1.2.3 From ce93718fb7cdbc064c3000ff59e4d3200bdfa744 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 30 Aug 2014 19:22:20 -0700 Subject: net: Don't keep around original SKB when we software segment GSO frames. Just maintain the list properly by returning the head of the remaining SKB list from dev_hard_start_xmit(). Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +-- net/core/dev.c | 79 +++++++++-------------------------------------- net/sched/sch_generic.c | 2 +- 3 files changed, 17 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 47c49ba2dcf4..202c25a9aadf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2828,8 +2828,8 @@ int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_port_id *ppid); struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev); -int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq); +struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq, int *ret); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index 75bc5b068a13..c89da4f306b1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2485,52 +2485,6 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) return 0; } -struct dev_gso_cb { - void (*destructor)(struct sk_buff *skb); -}; - -#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) - -static void dev_gso_skb_destructor(struct sk_buff *skb) -{ - struct dev_gso_cb *cb; - - kfree_skb_list(skb->next); - skb->next = NULL; - - cb = DEV_GSO_CB(skb); - if (cb->destructor) - cb->destructor(skb); -} - -/** - * dev_gso_segment - Perform emulated hardware segmentation on skb. - * @skb: buffer to segment - * @features: device features as applicable to this skb - * - * This function segments the given skb and stores the list of segments - * in skb->next. - */ -static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) -{ - struct sk_buff *segs; - - segs = skb_gso_segment(skb, features); - - /* Verifying header integrity only. */ - if (!segs) - return 0; - - if (IS_ERR(segs)) - return PTR_ERR(segs); - - skb->next = segs; - DEV_GSO_CB(skb)->destructor = skb->destructor; - skb->destructor = dev_gso_skb_destructor; - - return 0; -} - /* If MPLS offload request, verify we are testing hardware MPLS features * instead of standard features for the netdev. */ @@ -2682,8 +2636,13 @@ struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) features &= dev->hw_enc_features; if (netif_needs_gso(skb, features)) { - if (unlikely(dev_gso_segment(skb, features))) - goto out_kfree_skb; + struct sk_buff *segs; + + segs = skb_gso_segment(skb, features); + kfree_skb(skb); + if (IS_ERR(segs)) + segs = NULL; + skb = segs; } else { if (skb_needs_linearize(skb, features) && __skb_linearize(skb)) @@ -2714,26 +2673,16 @@ out_null: return NULL; } -int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, - struct netdev_queue *txq) +struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq, int *ret) { - int rc = NETDEV_TX_OK; - - if (likely(!skb->next)) - return xmit_one(skb, dev, txq, false); - - skb->next = xmit_list(skb->next, dev, txq, &rc); - if (likely(skb->next == NULL)) { - skb->destructor = DEV_GSO_CB(skb)->destructor; - consume_skb(skb); - return rc; + if (likely(!skb->next)) { + *ret = xmit_one(skb, dev, txq, false); + return skb; } - kfree_skb(skb); - - return rc; + return xmit_list(skb, dev, txq, ret); } -EXPORT_SYMBOL_GPL(dev_hard_start_xmit); static void qdisc_pkt_len_init(struct sk_buff *skb) { @@ -2945,7 +2894,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) if (!netif_xmit_stopped(txq)) { __this_cpu_inc(xmit_recursion); - rc = dev_hard_start_xmit(skb, dev, txq); + skb = dev_hard_start_xmit(skb, dev, txq, &rc); __this_cpu_dec(xmit_recursion); if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index f178798a5836..a8bf9f9928bd 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -129,7 +129,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) - ret = dev_hard_start_xmit(skb, dev, txq); + skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); -- cgit v1.2.3 From 5a21232983aa7acfe7fd26170832a9e0a4a7b4ae Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 31 Aug 2014 15:12:41 -0700 Subject: net: Support for csum_bad in skbuff This flag indicates that an invalid checksum was detected in the packet. __skb_mark_checksum_bad helper function was added to set this. Checksums can be marked bad from a driver or the GRO path (the latter is implemented in this patch). csum_bad is checked in __skb_checksum_validate_complete (i.e. calling that when ip_summed == CHECKSUM_NONE). csum_bad works in conjunction with ip_summed value. In the case that ip_summed is CHECKSUM_NONE and csum_bad is set, this implies that the first (or next) checksum encountered in the packet is bad. When ip_summed is CHECKSUM_UNNECESSARY, the first checksum after the last one validated is bad. For example, if ip_summed == CHECKSUM_UNNECESSARY, csum_level == 1, and csum_bad is set-- then the third checksum in the packet is bad. In the normal path, the packet will be dropped when processing the protocol layer of the bad checksum: __skb_decr_checksum_unnecessary called twice for the good checksums changing ip_summed to CHECKSUM_NONE so that __skb_checksum_validate_complete is called to validate the third checksum and that will fail since csum_bad is set. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- include/linux/skbuff.h | 21 ++++++++++++++++++++- net/core/dev.c | 2 +- 3 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 202c25a9aadf..a0ab6d9d400a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2216,7 +2216,9 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ __ret = __skb_gro_checksum_validate_complete(skb, \ compute_pseudo(skb, proto)); \ - if (!__ret) \ + if (__ret) \ + __skb_mark_checksum_bad(skb); \ + else \ skb_gro_incr_csum_unnecessary(skb); \ __ret; \ }) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c93b5859a772..23710a243439 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -617,7 +617,8 @@ struct sk_buff { kmemcheck_bitfield_begin(flags3); __u8 csum_level:2; - /* 14 bit hole */ + __u8 csum_bad:1; + /* 13 bit hole */ kmemcheck_bitfield_end(flags3); __be16 inner_protocol; @@ -2825,6 +2826,21 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) } } +static inline void __skb_mark_checksum_bad(struct sk_buff *skb) +{ + /* Mark current checksum as bad (typically called from GRO + * path). In the case that ip_summed is CHECKSUM_NONE + * this must be the first checksum encountered in the packet. + * When ip_summed is CHECKSUM_UNNECESSARY, this is the first + * checksum after the last one validated. For UDP, a zero + * checksum can not be marked as bad. + */ + + if (skb->ip_summed == CHECKSUM_NONE || + skb->ip_summed == CHECKSUM_UNNECESSARY) + skb->csum_bad = 1; +} + /* Check if we need to perform checksum complete validation. * * Returns true if checksum complete is needed, false otherwise @@ -2866,6 +2882,9 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, skb->csum_valid = 1; return 0; } + } else if (skb->csum_bad) { + /* ip_summed == CHECKSUM_NONE in this case */ + return 1; } skb->csum = psum; diff --git a/net/core/dev.c b/net/core/dev.c index 6857d57aa294..3774afc3bebf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3918,7 +3918,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (!(skb->dev->features & NETIF_F_GRO)) goto normal; - if (skb_is_gso(skb) || skb_has_frag_list(skb)) + if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad) goto normal; gro_list_prepare(napi, skb); -- cgit v1.2.3 From d96535a17dbbafd567961d14c08c0984ddda9c3c Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 31 Aug 2014 15:12:42 -0700 Subject: net: Infrastructure for checksum unnecessary conversions For normal path, added skb_checksum_try_convert which is called to attempt to convert CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE. The primary condition to allow this is that ip_summed is CHECKSUM_NONE and csum_valid is true, which will be the state after consuming a CHECKSUM_UNNECESSARY. For GRO path, added skb_gro_checksum_try_convert which is the GRO analogue of skb_checksum_try_convert. The primary condition to allow this is that NAPI_GRO_CB(skb)->csum_cnt == 0 and NAPI_GRO_CB(skb)->csum_valid is set. This implies that we have consumed all available CHECKSUM_UNNECESSARY checksums in the GRO path. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 20 ++++++++++++++++++++ include/linux/skbuff.h | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a0ab6d9d400a..5be20a7bbb0d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2233,6 +2233,26 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) #define skb_gro_checksum_simple_validate(skb) \ __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) +static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->csum_cnt == 0 && + !NAPI_GRO_CB(skb)->csum_valid); +} + +static inline void __skb_gro_checksum_convert(struct sk_buff *skb, + __sum16 check, __wsum pseudo) +{ + NAPI_GRO_CB(skb)->csum = ~pseudo; + NAPI_GRO_CB(skb)->csum_valid = 1; +} + +#define skb_gro_checksum_try_convert(skb, proto, check, compute_pseudo) \ +do { \ + if (__skb_gro_checksum_convert_check(skb)) \ + __skb_gro_checksum_convert(skb, check, \ + compute_pseudo(skb, proto)); \ +} while (0) + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 23710a243439..02529fcad1ac 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2942,6 +2942,26 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto) #define skb_checksum_simple_validate(skb) \ __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo) +static inline bool __skb_checksum_convert_check(struct sk_buff *skb) +{ + return (skb->ip_summed == CHECKSUM_NONE && + skb->csum_valid && !skb->csum_bad); +} + +static inline void __skb_checksum_convert(struct sk_buff *skb, + __sum16 check, __wsum pseudo) +{ + skb->csum = ~pseudo; + skb->ip_summed = CHECKSUM_COMPLETE; +} + +#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \ +do { \ + if (__skb_checksum_convert_check(skb)) \ + __skb_checksum_convert(skb, check, \ + compute_pseudo(skb, proto)); \ +} while (0) + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) -- cgit v1.2.3 From 2abb7cdc0dc84e99b76ef983a1ae1978922aa9b3 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 31 Aug 2014 15:12:43 -0700 Subject: udp: Add support for doing checksum unnecessary conversion Add support for doing CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion in UDP tunneling path. In the normal UDP path, we call skb_checksum_try_convert after locating the UDP socket. The check is that checksum conversion is enabled for the socket (new flag in UDP socket) and that checksum field is non-zero. In the UDP GRO path, we call skb_gro_checksum_try_convert after checksum is validated and checksum field is non-zero. Since this is already in GRO we assume that checksum conversion is always wanted. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/udp.h | 16 +++++++++++++++- net/ipv4/udp.c | 4 ++++ net/ipv4/udp_offload.c | 25 +++++++++++++++++-------- net/ipv6/udp.c | 4 ++++ net/ipv6/udp_offload.c | 24 +++++++++++++++++------- 5 files changed, 57 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 247cfdcc4b08..ee3277593222 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -49,7 +49,11 @@ struct udp_sock { unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */ + no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ + convert_csum:1;/* On receive, convert checksum + * unnecessary to checksum complete + * if possible. + */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -98,6 +102,16 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) return udp_sk(sk)->no_check6_rx; } +static inline void udp_set_convert_csum(struct sock *sk, bool val) +{ + udp_sk(sk)->convert_csum = val; +} + +static inline bool udp_get_convert_csum(struct sock *sk) +{ + return udp_sk(sk)->convert_csum; +} + #define udp_portaddr_for_each_entry(__sk, node, list) \ hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3549c21fe5f7..0da3849fd35b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1788,6 +1788,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; + if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + inet_compute_pseudo); + ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index a6adff98382a..84e0e05c9c0e 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -290,16 +290,25 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head, { struct udphdr *uh = udp_gro_udphdr(skb); - /* Don't bother verifying checksum if we're going to flush anyway. */ - if (unlikely(!uh) || - (!NAPI_GRO_CB(skb)->flush && - skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, - inet_gro_compute_pseudo))) { - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - } + if (unlikely(!uh)) + goto flush; + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (!NAPI_GRO_CB(skb)->flush) + goto skip; + + if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, + inet_gro_compute_pseudo)) + goto flush; + else if (uh->check) + skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + inet_gro_compute_pseudo); +skip: return udp_gro_receive(head, skb, uh); + +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; } int udp_gro_complete(struct sk_buff *skb, int nhoff) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 12fcce8fba46..f6ba535b6feb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -891,6 +891,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto csum_error; } + if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + ip6_compute_pseudo); + ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index b13e377e9c53..89cb9a9b8537 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -134,16 +134,26 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head, { struct udphdr *uh = udp_gro_udphdr(skb); + if (unlikely(!uh)) + goto flush; + /* Don't bother verifying checksum if we're going to flush anyway. */ - if (unlikely(!uh) || - (!NAPI_GRO_CB(skb)->flush && - skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, - ip6_gro_compute_pseudo))) { - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - } + if (!NAPI_GRO_CB(skb)->flush) + goto skip; + if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, + ip6_gro_compute_pseudo)) + goto flush; + else if (uh->check) + skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + ip6_gro_compute_pseudo); + +skip: return udp_gro_receive(head, skb, uh); + +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; } int udp6_gro_complete(struct sk_buff *skb, int nhoff) -- cgit v1.2.3 From fbff66108352d19b5cffa7dce26d7638c9dd4d70 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Thu, 28 Aug 2014 04:43:09 -0700 Subject: security: Silence shadow warning Renaming an unused formal parameter in the static inline function security_inode_init_security eliminates many W=2 warnings. Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher Signed-off-by: James Morris --- include/linux/security.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 623f90e5f38d..3b3aeb1b74cb 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2108,7 +2108,7 @@ static inline int security_dentry_init_security(struct dentry *dentry, static inline int security_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, - const initxattrs initxattrs, + const initxattrs xattrs, void *fs_data) { return 0; -- cgit v1.2.3 From ea2fdf842365066c82ab941086c6a1741ced4f2a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 28 Aug 2014 13:58:53 +0200 Subject: usb: phy: samsung: remove old common USB PHY code drivers/usb/phy/phy-samsung-usb[2,3] drivers got replaced by drivers/phy/phy-samsung-usb[2,3] ones and the old common Samsung USB PHY code is no longer used. Signed-off-by: Bartlomiej Zolnierkiewicz Acked-by: Kyungmin Park Reviewed-by: Vivek Gautam Reviewed-by: Jingoo Han Acked-by: Kishon Vijay Abraham I Cc: Kamil Debski Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-samsung-usb.c | 241 -------------------- drivers/usb/phy/phy-samsung-usb.h | 317 --------------------------- include/linux/platform_data/samsung-usbphy.h | 27 --- 3 files changed, 585 deletions(-) delete mode 100644 drivers/usb/phy/phy-samsung-usb.c delete mode 100644 drivers/usb/phy/phy-samsung-usb.h delete mode 100644 include/linux/platform_data/samsung-usbphy.h (limited to 'include/linux') diff --git a/drivers/usb/phy/phy-samsung-usb.c b/drivers/usb/phy/phy-samsung-usb.c deleted file mode 100644 index ac025ca08425..000000000000 --- a/drivers/usb/phy/phy-samsung-usb.c +++ /dev/null @@ -1,241 +0,0 @@ -/* linux/drivers/usb/phy/phy-samsung-usb.c - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * Author: Praveen Paneri - * - * Samsung USB-PHY helper driver with common function calls; - * interacts with Samsung USB 2.0 PHY controller driver and later - * with Samsung USB 3.0 PHY driver. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "phy-samsung-usb.h" - -int samsung_usbphy_parse_dt(struct samsung_usbphy *sphy) -{ - struct device_node *usbphy_sys; - - /* Getting node for system controller interface for usb-phy */ - usbphy_sys = of_get_child_by_name(sphy->dev->of_node, "usbphy-sys"); - if (!usbphy_sys) { - dev_err(sphy->dev, "No sys-controller interface for usb-phy\n"); - return -ENODEV; - } - - sphy->pmuregs = of_iomap(usbphy_sys, 0); - - if (sphy->pmuregs == NULL) { - dev_err(sphy->dev, "Can't get usb-phy pmu control register\n"); - goto err0; - } - - sphy->sysreg = of_iomap(usbphy_sys, 1); - - /* - * Not returning error code here, since this situation is not fatal. - * Few SoCs may not have this switch available - */ - if (sphy->sysreg == NULL) - dev_warn(sphy->dev, "Can't get usb-phy sysreg cfg register\n"); - - of_node_put(usbphy_sys); - - return 0; - -err0: - of_node_put(usbphy_sys); - return -ENXIO; -} -EXPORT_SYMBOL_GPL(samsung_usbphy_parse_dt); - -/* - * Set isolation here for phy. - * Here 'on = true' would mean USB PHY block is isolated, hence - * de-activated and vice-versa. - */ -void samsung_usbphy_set_isolation_4210(struct samsung_usbphy *sphy, bool on) -{ - void __iomem *reg = NULL; - u32 reg_val; - u32 en_mask = 0; - - if (!sphy->pmuregs) { - dev_warn(sphy->dev, "Can't set pmu isolation\n"); - return; - } - - if (sphy->phy_type == USB_PHY_TYPE_DEVICE) { - reg = sphy->pmuregs + sphy->drv_data->devphy_reg_offset; - en_mask = sphy->drv_data->devphy_en_mask; - } else if (sphy->phy_type == USB_PHY_TYPE_HOST) { - reg = sphy->pmuregs + sphy->drv_data->hostphy_reg_offset; - en_mask = sphy->drv_data->hostphy_en_mask; - } - - reg_val = readl(reg); - - if (on) - reg_val &= ~en_mask; - else - reg_val |= en_mask; - - writel(reg_val, reg); - - if (sphy->drv_data->cpu_type == TYPE_EXYNOS4X12) { - writel(reg_val, sphy->pmuregs + EXYNOS4X12_PHY_HSIC_CTRL0); - writel(reg_val, sphy->pmuregs + EXYNOS4X12_PHY_HSIC_CTRL1); - } -} -EXPORT_SYMBOL_GPL(samsung_usbphy_set_isolation_4210); - -/* - * Configure the mode of working of usb-phy here: HOST/DEVICE. - */ -void samsung_usbphy_cfg_sel(struct samsung_usbphy *sphy) -{ - u32 reg; - - if (!sphy->sysreg) { - dev_warn(sphy->dev, "Can't configure specified phy mode\n"); - return; - } - - reg = readl(sphy->sysreg); - - if (sphy->phy_type == USB_PHY_TYPE_DEVICE) - reg &= ~EXYNOS_USB20PHY_CFG_HOST_LINK; - else if (sphy->phy_type == USB_PHY_TYPE_HOST) - reg |= EXYNOS_USB20PHY_CFG_HOST_LINK; - - writel(reg, sphy->sysreg); -} -EXPORT_SYMBOL_GPL(samsung_usbphy_cfg_sel); - -/* - * PHYs are different for USB Device and USB Host. - * This make sure that correct PHY type is selected before - * any operation on PHY. - */ -int samsung_usbphy_set_type(struct usb_phy *phy, - enum samsung_usb_phy_type phy_type) -{ - struct samsung_usbphy *sphy = phy_to_sphy(phy); - - sphy->phy_type = phy_type; - - return 0; -} -EXPORT_SYMBOL_GPL(samsung_usbphy_set_type); - -int samsung_usbphy_rate_to_clksel_64xx(struct samsung_usbphy *sphy, - unsigned long rate) -{ - unsigned int clksel; - - switch (rate) { - case 12 * MHZ: - clksel = PHYCLK_CLKSEL_12M; - break; - case 24 * MHZ: - clksel = PHYCLK_CLKSEL_24M; - break; - case 48 * MHZ: - clksel = PHYCLK_CLKSEL_48M; - break; - default: - dev_err(sphy->dev, - "Invalid reference clock frequency: %lu\n", rate); - return -EINVAL; - } - - return clksel; -} -EXPORT_SYMBOL_GPL(samsung_usbphy_rate_to_clksel_64xx); - -int samsung_usbphy_rate_to_clksel_4x12(struct samsung_usbphy *sphy, - unsigned long rate) -{ - unsigned int clksel; - - switch (rate) { - case 9600 * KHZ: - clksel = FSEL_CLKSEL_9600K; - break; - case 10 * MHZ: - clksel = FSEL_CLKSEL_10M; - break; - case 12 * MHZ: - clksel = FSEL_CLKSEL_12M; - break; - case 19200 * KHZ: - clksel = FSEL_CLKSEL_19200K; - break; - case 20 * MHZ: - clksel = FSEL_CLKSEL_20M; - break; - case 24 * MHZ: - clksel = FSEL_CLKSEL_24M; - break; - case 50 * MHZ: - clksel = FSEL_CLKSEL_50M; - break; - default: - dev_err(sphy->dev, - "Invalid reference clock frequency: %lu\n", rate); - return -EINVAL; - } - - return clksel; -} -EXPORT_SYMBOL_GPL(samsung_usbphy_rate_to_clksel_4x12); - -/* - * Returns reference clock frequency selection value - */ -int samsung_usbphy_get_refclk_freq(struct samsung_usbphy *sphy) -{ - struct clk *ref_clk; - unsigned long rate; - int refclk_freq; - - /* - * In exynos5250 USB host and device PHY use - * external crystal clock XXTI - */ - if (sphy->drv_data->cpu_type == TYPE_EXYNOS5250) - ref_clk = clk_get(sphy->dev, "ext_xtal"); - else - ref_clk = clk_get(sphy->dev, "xusbxti"); - if (IS_ERR(ref_clk)) { - dev_err(sphy->dev, "Failed to get reference clock\n"); - return PTR_ERR(ref_clk); - } - - rate = clk_get_rate(ref_clk); - refclk_freq = sphy->drv_data->rate_to_clksel(sphy, rate); - - clk_put(ref_clk); - - return refclk_freq; -} -EXPORT_SYMBOL_GPL(samsung_usbphy_get_refclk_freq); diff --git a/drivers/usb/phy/phy-samsung-usb.h b/drivers/usb/phy/phy-samsung-usb.h deleted file mode 100644 index 4eef45555971..000000000000 --- a/drivers/usb/phy/phy-samsung-usb.h +++ /dev/null @@ -1,317 +0,0 @@ -/* linux/drivers/usb/phy/phy-samsung-usb.h - * - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * Samsung USB-PHY transceiver; talks to S3C HS OTG controller, EHCI-S5P and - * OHCI-EXYNOS controllers. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include - -/* Register definitions */ - -#define SAMSUNG_PHYPWR (0x00) -#define PHYPWR_NORMAL_MASK (0x19 << 0) -#define PHYPWR_OTG_DISABLE (0x1 << 4) -#define PHYPWR_ANALOG_POWERDOWN (0x1 << 3) -#define PHYPWR_FORCE_SUSPEND (0x1 << 1) -/* For Exynos4 */ -#define PHYPWR_NORMAL_MASK_PHY0 (0x39 << 0) -#define PHYPWR_SLEEP_PHY0 (0x1 << 5) - -#define SAMSUNG_PHYCLK (0x04) -#define PHYCLK_MODE_USB11 (0x1 << 6) -#define PHYCLK_EXT_OSC (0x1 << 5) -#define PHYCLK_COMMON_ON_N (0x1 << 4) -#define PHYCLK_ID_PULL (0x1 << 2) -#define PHYCLK_CLKSEL_MASK (0x3 << 0) -#define PHYCLK_CLKSEL_48M (0x0 << 0) -#define PHYCLK_CLKSEL_12M (0x2 << 0) -#define PHYCLK_CLKSEL_24M (0x3 << 0) - -#define SAMSUNG_RSTCON (0x08) -#define RSTCON_PHYLINK_SWRST (0x1 << 2) -#define RSTCON_HLINK_SWRST (0x1 << 1) -#define RSTCON_SWRST (0x1 << 0) - -/* EXYNOS4X12 */ -#define EXYNOS4X12_PHY_HSIC_CTRL0 (0x04) -#define EXYNOS4X12_PHY_HSIC_CTRL1 (0x08) -#define PHYPWR_NORMAL_MASK_HSIC1 (0x7 << 12) -#define PHYPWR_NORMAL_MASK_HSIC0 (0x7 << 9) -#define PHYPWR_NORMAL_MASK_PHY1 (0x7 << 6) -#define RSTCON_HOSTPHY_SWRST (0xf << 3) - -/* EXYNOS5 */ -#define EXYNOS5_PHY_HOST_CTRL0 (0x00) -#define HOST_CTRL0_PHYSWRSTALL (0x1 << 31) -#define HOST_CTRL0_REFCLKSEL_MASK (0x3 << 19) -#define HOST_CTRL0_REFCLKSEL_XTAL (0x0 << 19) -#define HOST_CTRL0_REFCLKSEL_EXTL (0x1 << 19) -#define HOST_CTRL0_REFCLKSEL_CLKCORE (0x2 << 19) -#define HOST_CTRL0_FSEL_MASK (0x7 << 16) -#define HOST_CTRL0_FSEL(_x) ((_x) << 16) -#define FSEL_CLKSEL_50M (0x7) -#define FSEL_CLKSEL_24M (0x5) -#define FSEL_CLKSEL_20M (0x4) -#define FSEL_CLKSEL_19200K (0x3) -#define FSEL_CLKSEL_12M (0x2) -#define FSEL_CLKSEL_10M (0x1) -#define FSEL_CLKSEL_9600K (0x0) -#define HOST_CTRL0_TESTBURNIN (0x1 << 11) -#define HOST_CTRL0_RETENABLE (0x1 << 10) -#define HOST_CTRL0_COMMONON_N (0x1 << 9) -#define HOST_CTRL0_SIDDQ (0x1 << 6) -#define HOST_CTRL0_FORCESLEEP (0x1 << 5) -#define HOST_CTRL0_FORCESUSPEND (0x1 << 4) -#define HOST_CTRL0_WORDINTERFACE (0x1 << 3) -#define HOST_CTRL0_UTMISWRST (0x1 << 2) -#define HOST_CTRL0_LINKSWRST (0x1 << 1) -#define HOST_CTRL0_PHYSWRST (0x1 << 0) - -#define EXYNOS5_PHY_HOST_TUNE0 (0x04) - -#define EXYNOS5_PHY_HSIC_CTRL1 (0x10) - -#define EXYNOS5_PHY_HSIC_TUNE1 (0x14) - -#define EXYNOS5_PHY_HSIC_CTRL2 (0x20) - -#define EXYNOS5_PHY_HSIC_TUNE2 (0x24) -#define HSIC_CTRL_REFCLKSEL_MASK (0x3 << 23) -#define HSIC_CTRL_REFCLKSEL (0x2 << 23) -#define HSIC_CTRL_REFCLKDIV_MASK (0x7f << 16) -#define HSIC_CTRL_REFCLKDIV(_x) ((_x) << 16) -#define HSIC_CTRL_REFCLKDIV_12 (0x24 << 16) -#define HSIC_CTRL_REFCLKDIV_15 (0x1c << 16) -#define HSIC_CTRL_REFCLKDIV_16 (0x1a << 16) -#define HSIC_CTRL_REFCLKDIV_19_2 (0x15 << 16) -#define HSIC_CTRL_REFCLKDIV_20 (0x14 << 16) -#define HSIC_CTRL_SIDDQ (0x1 << 6) -#define HSIC_CTRL_FORCESLEEP (0x1 << 5) -#define HSIC_CTRL_FORCESUSPEND (0x1 << 4) -#define HSIC_CTRL_WORDINTERFACE (0x1 << 3) -#define HSIC_CTRL_UTMISWRST (0x1 << 2) -#define HSIC_CTRL_PHYSWRST (0x1 << 0) - -#define EXYNOS5_PHY_HOST_EHCICTRL (0x30) -#define HOST_EHCICTRL_ENAINCRXALIGN (0x1 << 29) -#define HOST_EHCICTRL_ENAINCR4 (0x1 << 28) -#define HOST_EHCICTRL_ENAINCR8 (0x1 << 27) -#define HOST_EHCICTRL_ENAINCR16 (0x1 << 26) - -#define EXYNOS5_PHY_HOST_OHCICTRL (0x34) -#define HOST_OHCICTRL_SUSPLGCY (0x1 << 3) -#define HOST_OHCICTRL_APPSTARTCLK (0x1 << 2) -#define HOST_OHCICTRL_CNTSEL (0x1 << 1) -#define HOST_OHCICTRL_CLKCKTRST (0x1 << 0) - -#define EXYNOS5_PHY_OTG_SYS (0x38) -#define OTG_SYS_PHYLINK_SWRESET (0x1 << 14) -#define OTG_SYS_LINKSWRST_UOTG (0x1 << 13) -#define OTG_SYS_PHY0_SWRST (0x1 << 12) -#define OTG_SYS_REFCLKSEL_MASK (0x3 << 9) -#define OTG_SYS_REFCLKSEL_XTAL (0x0 << 9) -#define OTG_SYS_REFCLKSEL_EXTL (0x1 << 9) -#define OTG_SYS_REFCLKSEL_CLKCORE (0x2 << 9) -#define OTG_SYS_IDPULLUP_UOTG (0x1 << 8) -#define OTG_SYS_COMMON_ON (0x1 << 7) -#define OTG_SYS_FSEL_MASK (0x7 << 4) -#define OTG_SYS_FSEL(_x) ((_x) << 4) -#define OTG_SYS_FORCESLEEP (0x1 << 3) -#define OTG_SYS_OTGDISABLE (0x1 << 2) -#define OTG_SYS_SIDDQ_UOTG (0x1 << 1) -#define OTG_SYS_FORCESUSPEND (0x1 << 0) - -#define EXYNOS5_PHY_OTG_TUNE (0x40) - -/* EXYNOS5: USB 3.0 DRD */ -#define EXYNOS5_DRD_LINKSYSTEM (0x04) -#define LINKSYSTEM_FLADJ_MASK (0x3f << 1) -#define LINKSYSTEM_FLADJ(_x) ((_x) << 1) -#define LINKSYSTEM_XHCI_VERSION_CONTROL (0x1 << 27) - -#define EXYNOS5_DRD_PHYUTMI (0x08) -#define PHYUTMI_OTGDISABLE (0x1 << 6) -#define PHYUTMI_FORCESUSPEND (0x1 << 1) -#define PHYUTMI_FORCESLEEP (0x1 << 0) - -#define EXYNOS5_DRD_PHYPIPE (0x0c) - -#define EXYNOS5_DRD_PHYCLKRST (0x10) -#define PHYCLKRST_SSC_REFCLKSEL_MASK (0xff << 23) -#define PHYCLKRST_SSC_REFCLKSEL(_x) ((_x) << 23) -#define PHYCLKRST_SSC_RANGE_MASK (0x03 << 21) -#define PHYCLKRST_SSC_RANGE(_x) ((_x) << 21) -#define PHYCLKRST_SSC_EN (0x1 << 20) -#define PHYCLKRST_REF_SSP_EN (0x1 << 19) -#define PHYCLKRST_REF_CLKDIV2 (0x1 << 18) -#define PHYCLKRST_MPLL_MULTIPLIER_MASK (0x7f << 11) -#define PHYCLKRST_MPLL_MULTIPLIER_100MHZ_REF (0x19 << 11) -#define PHYCLKRST_MPLL_MULTIPLIER_50M_REF (0x02 << 11) -#define PHYCLKRST_MPLL_MULTIPLIER_24MHZ_REF (0x68 << 11) -#define PHYCLKRST_MPLL_MULTIPLIER_20MHZ_REF (0x7d << 11) -#define PHYCLKRST_MPLL_MULTIPLIER_19200KHZ_REF (0x02 << 11) -#define PHYCLKRST_FSEL_MASK (0x3f << 5) -#define PHYCLKRST_FSEL(_x) ((_x) << 5) -#define PHYCLKRST_FSEL_PAD_100MHZ (0x27 << 5) -#define PHYCLKRST_FSEL_PAD_24MHZ (0x2a << 5) -#define PHYCLKRST_FSEL_PAD_20MHZ (0x31 << 5) -#define PHYCLKRST_FSEL_PAD_19_2MHZ (0x38 << 5) -#define PHYCLKRST_RETENABLEN (0x1 << 4) -#define PHYCLKRST_REFCLKSEL_MASK (0x03 << 2) -#define PHYCLKRST_REFCLKSEL_PAD_REFCLK (0x2 << 2) -#define PHYCLKRST_REFCLKSEL_EXT_REFCLK (0x3 << 2) -#define PHYCLKRST_PORTRESET (0x1 << 1) -#define PHYCLKRST_COMMONONN (0x1 << 0) - -#define EXYNOS5_DRD_PHYREG0 (0x14) - -#define EXYNOS5_DRD_PHYREG1 (0x18) - -#define EXYNOS5_DRD_PHYPARAM0 (0x1c) -#define PHYPARAM0_REF_USE_PAD (0x1 << 31) -#define PHYPARAM0_REF_LOSLEVEL_MASK (0x1f << 26) -#define PHYPARAM0_REF_LOSLEVEL (0x9 << 26) - -#define EXYNOS5_DRD_PHYPARAM1 (0x20) -#define PHYPARAM1_PCS_TXDEEMPH_MASK (0x1f << 0) -#define PHYPARAM1_PCS_TXDEEMPH (0x1c) - -#define EXYNOS5_DRD_PHYTERM (0x24) - -#define EXYNOS5_DRD_PHYTEST (0x28) -#define PHYTEST_POWERDOWN_SSP (0x1 << 3) -#define PHYTEST_POWERDOWN_HSP (0x1 << 2) - -#define EXYNOS5_DRD_PHYADP (0x2c) - -#define EXYNOS5_DRD_PHYBATCHG (0x30) -#define PHYBATCHG_UTMI_CLKSEL (0x1 << 2) - -#define EXYNOS5_DRD_PHYRESUME (0x34) - -#define EXYNOS5_DRD_LINKPORT (0x44) - -#ifndef MHZ -#define MHZ (1000*1000) -#endif - -#ifndef KHZ -#define KHZ (1000) -#endif - -#define EXYNOS_USBHOST_PHY_CTRL_OFFSET (0x4) -#define S3C64XX_USBPHY_ENABLE (0x1 << 16) -#define EXYNOS_USBPHY_ENABLE (0x1 << 0) -#define EXYNOS_USB20PHY_CFG_HOST_LINK (0x1 << 0) - -enum samsung_cpu_type { - TYPE_S3C64XX, - TYPE_EXYNOS4210, - TYPE_EXYNOS4X12, - TYPE_EXYNOS5250, -}; - -struct samsung_usbphy; - -/* - * struct samsung_usbphy_drvdata - driver data for various SoC variants - * @cpu_type: machine identifier - * @devphy_en_mask: device phy enable mask for PHY CONTROL register - * @hostphy_en_mask: host phy enable mask for PHY CONTROL register - * @devphy_reg_offset: offset to DEVICE PHY CONTROL register from - * mapped address of system controller. - * @hostphy_reg_offset: offset to HOST PHY CONTROL register from - * mapped address of system controller. - * - * Here we have a separate mask for device type phy. - * Having different masks for host and device type phy helps - * in setting independent masks in case of SoCs like S5PV210, - * in which PHY0 and PHY1 enable bits belong to same register - * placed at position 0 and 1 respectively. - * Although for newer SoCs like exynos these bits belong to - * different registers altogether placed at position 0. - */ -struct samsung_usbphy_drvdata { - int cpu_type; - int devphy_en_mask; - int hostphy_en_mask; - u32 devphy_reg_offset; - u32 hostphy_reg_offset; - int (*rate_to_clksel)(struct samsung_usbphy *, unsigned long); - void (*set_isolation)(struct samsung_usbphy *, bool); - void (*phy_enable)(struct samsung_usbphy *); - void (*phy_disable)(struct samsung_usbphy *); -}; - -/* - * struct samsung_usbphy - transceiver driver state - * @phy: transceiver structure - * @plat: platform data - * @dev: The parent device supplied to the probe function - * @clk: usb phy clock - * @regs: usb phy controller registers memory base - * @pmuregs: USB device PHY_CONTROL register memory base - * @sysreg: USB2.0 PHY_CFG register memory base - * @ref_clk_freq: reference clock frequency selection - * @drv_data: driver data available for different SoCs - * @phy_type: Samsung SoCs specific phy types: #HOST - * #DEVICE - * @phy_usage: usage count for phy - * @lock: lock for phy operations - */ -struct samsung_usbphy { - struct usb_phy phy; - struct samsung_usbphy_data *plat; - struct device *dev; - struct clk *clk; - void __iomem *regs; - void __iomem *pmuregs; - void __iomem *sysreg; - int ref_clk_freq; - const struct samsung_usbphy_drvdata *drv_data; - enum samsung_usb_phy_type phy_type; - atomic_t phy_usage; - spinlock_t lock; -}; - -#define phy_to_sphy(x) container_of((x), struct samsung_usbphy, phy) - -static const struct of_device_id samsung_usbphy_dt_match[]; - -static inline const struct samsung_usbphy_drvdata -*samsung_usbphy_get_driver_data(struct platform_device *pdev) -{ - if (pdev->dev.of_node) { - const struct of_device_id *match; - match = of_match_node(samsung_usbphy_dt_match, - pdev->dev.of_node); - return match->data; - } - - return (struct samsung_usbphy_drvdata *) - platform_get_device_id(pdev)->driver_data; -} - -extern int samsung_usbphy_parse_dt(struct samsung_usbphy *sphy); -extern void samsung_usbphy_set_isolation_4210(struct samsung_usbphy *sphy, - bool on); -extern void samsung_usbphy_cfg_sel(struct samsung_usbphy *sphy); -extern int samsung_usbphy_set_type(struct usb_phy *phy, - enum samsung_usb_phy_type phy_type); -extern int samsung_usbphy_get_refclk_freq(struct samsung_usbphy *sphy); -extern int samsung_usbphy_rate_to_clksel_64xx(struct samsung_usbphy *sphy, - unsigned long rate); -extern int samsung_usbphy_rate_to_clksel_4x12(struct samsung_usbphy *sphy, - unsigned long rate); diff --git a/include/linux/platform_data/samsung-usbphy.h b/include/linux/platform_data/samsung-usbphy.h deleted file mode 100644 index 1bd24cba982b..000000000000 --- a/include/linux/platform_data/samsung-usbphy.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2012 Samsung Electronics Co.Ltd - * http://www.samsung.com/ - * Author: Praveen Paneri - * - * Defines platform data for samsung usb phy driver. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef __SAMSUNG_USBPHY_PLATFORM_H -#define __SAMSUNG_USBPHY_PLATFORM_H - -/** - * samsung_usbphy_data - Platform data for USB PHY driver. - * @pmu_isolation: Function to control usb phy isolation in PMU. - */ -struct samsung_usbphy_data { - void (*pmu_isolation)(int on); -}; - -extern void samsung_usbphy_set_pdata(struct samsung_usbphy_data *pd); - -#endif /* __SAMSUNG_USBPHY_PLATFORM_H */ -- cgit v1.2.3 From 5835d96e9ce4efdba8c6cefffc2f1575925456de Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Sep 2014 14:46:04 -0400 Subject: percpu: implement [__]alloc_percpu_gfp() Now that pcpu_alloc_area() can allocate only from populated areas, it's easy to add atomic allocation support to [__]alloc_percpu(). Update pcpu_alloc() so that it accepts @gfp and skips all the blocking operations and allocates only from the populated areas if @gfp doesn't contain GFP_KERNEL. New interface functions [__]alloc_percpu_gfp() are added. While this means that atomic allocations are possible, this isn't complete yet as there's no mechanism to ensure that certain amount of populated areas is kept available and atomic allocations may keep failing under certain conditions. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 9 +++++-- mm/percpu.c | 64 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 46 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 6f61b61b7996..d1b416da25ed 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -122,11 +122,16 @@ extern void __init setup_per_cpu_areas(void); #endif extern void __init percpu_init_late(void); +extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp); extern void __percpu *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void __percpu *__pdata); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); -#define alloc_percpu(type) \ - (typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type)) +#define alloc_percpu_gfp(type, gfp) \ + (typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type), \ + __alignof__(type), gfp) +#define alloc_percpu(type) \ + (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ + __alignof__(type)) #endif /* __LINUX_PERCPU_H */ diff --git a/mm/percpu.c b/mm/percpu.c index 577d84fb3002..c52b93117dc2 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -151,11 +151,6 @@ static struct pcpu_chunk *pcpu_first_chunk; static struct pcpu_chunk *pcpu_reserved_chunk; static int pcpu_reserved_chunk_limit; -/* - * Free path accesses and alters only the index data structures and can be - * safely called from atomic context. When memory needs to be returned to - * the system, free path schedules reclaim_work. - */ static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */ @@ -727,20 +722,21 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) * @size: size of area to allocate in bytes * @align: alignment of area (max PAGE_SIZE) * @reserved: allocate from the reserved chunk if available + * @gfp: allocation flags * - * Allocate percpu area of @size bytes aligned at @align. - * - * CONTEXT: - * Does GFP_KERNEL allocation. + * Allocate percpu area of @size bytes aligned at @align. If @gfp doesn't + * contain %GFP_KERNEL, the allocation is atomic. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) +static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, + gfp_t gfp) { static int warn_limit = 10; struct pcpu_chunk *chunk; const char *err; + bool is_atomic = !(gfp & GFP_KERNEL); int slot, off, new_alloc, cpu, ret; unsigned long flags; void __percpu *ptr; @@ -773,14 +769,15 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) while ((new_alloc = pcpu_need_to_extend(chunk))) { spin_unlock_irqrestore(&pcpu_lock, flags); - if (pcpu_extend_area_map(chunk, new_alloc) < 0) { + if (is_atomic || + pcpu_extend_area_map(chunk, new_alloc) < 0) { err = "failed to extend area map of reserved chunk"; goto fail; } spin_lock_irqsave(&pcpu_lock, flags); } - off = pcpu_alloc_area(chunk, size, align, false); + off = pcpu_alloc_area(chunk, size, align, is_atomic); if (off >= 0) goto area_found; @@ -797,6 +794,8 @@ restart: new_alloc = pcpu_need_to_extend(chunk); if (new_alloc) { + if (is_atomic) + continue; spin_unlock_irqrestore(&pcpu_lock, flags); if (pcpu_extend_area_map(chunk, new_alloc) < 0) { @@ -811,7 +810,7 @@ restart: goto restart; } - off = pcpu_alloc_area(chunk, size, align, false); + off = pcpu_alloc_area(chunk, size, align, is_atomic); if (off >= 0) goto area_found; } @@ -824,6 +823,9 @@ restart: * tasks to create chunks simultaneously. Serialize and create iff * there's still no empty chunk after grabbing the mutex. */ + if (is_atomic) + goto fail; + mutex_lock(&pcpu_alloc_mutex); if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { @@ -846,7 +848,7 @@ area_found: spin_unlock_irqrestore(&pcpu_lock, flags); /* populate if not all pages are already there */ - if (true) { + if (!is_atomic) { int page_start, page_end, rs, re; mutex_lock(&pcpu_alloc_mutex); @@ -884,9 +886,9 @@ area_found: fail_unlock: spin_unlock_irqrestore(&pcpu_lock, flags); fail: - if (warn_limit) { - pr_warning("PERCPU: allocation failed, size=%zu align=%zu, " - "%s\n", size, align, err); + if (!is_atomic && warn_limit) { + pr_warning("PERCPU: allocation failed, size=%zu align=%zu atomic=%d, %s\n", + size, align, is_atomic, err); dump_stack(); if (!--warn_limit) pr_info("PERCPU: limit reached, disable warning\n"); @@ -895,22 +897,34 @@ fail: } /** - * __alloc_percpu - allocate dynamic percpu area + * __alloc_percpu_gfp - allocate dynamic percpu area * @size: size of area to allocate in bytes * @align: alignment of area (max PAGE_SIZE) + * @gfp: allocation flags * - * Allocate zero-filled percpu area of @size bytes aligned at @align. - * Might sleep. Might trigger writeouts. - * - * CONTEXT: - * Does GFP_KERNEL allocation. + * Allocate zero-filled percpu area of @size bytes aligned at @align. If + * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can + * be called from any context but is a lot more likely to fail. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ +void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) +{ + return pcpu_alloc(size, align, false, gfp); +} +EXPORT_SYMBOL_GPL(__alloc_percpu_gfp); + +/** + * __alloc_percpu - allocate dynamic percpu area + * @size: size of area to allocate in bytes + * @align: alignment of area (max PAGE_SIZE) + * + * Equivalent to __alloc_percpu_gfp(size, align, %GFP_KERNEL). + */ void __percpu *__alloc_percpu(size_t size, size_t align) { - return pcpu_alloc(size, align, false); + return pcpu_alloc(size, align, false, GFP_KERNEL); } EXPORT_SYMBOL_GPL(__alloc_percpu); @@ -932,7 +946,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu); */ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) { - return pcpu_alloc(size, align, true); + return pcpu_alloc(size, align, true, GFP_KERNEL); } /** -- cgit v1.2.3 From 1a4d76076cda69b0abf15463a8cebc172406da25 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Sep 2014 14:46:05 -0400 Subject: percpu: implement asynchronous chunk population The percpu allocator now supports atomic allocations by only allocating from already populated areas but the mechanism to ensure that there's adequate amount of populated areas was missing. This patch expands pcpu_balance_work so that in addition to freeing excess free chunks it also populates chunks to maintain an adequate level of populated areas. pcpu_alloc() schedules pcpu_balance_work if the amount of free populated areas is too low or after an atomic allocation failure. * PERPCU_DYNAMIC_RESERVE is increased by two pages to account for PCPU_EMPTY_POP_PAGES_LOW. * pcpu_async_enabled is added to gate both async jobs - chunk->map_extend_work and pcpu_balance_work - so that we don't end up scheduling them while the needed subsystems aren't up yet. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 4 +- mm/percpu.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 115 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d1b416da25ed..a3aa63e47637 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -48,9 +48,9 @@ * intelligent way to determine this would be nice. */ #if BITS_PER_LONG > 32 -#define PERCPU_DYNAMIC_RESERVE (20 << 10) +#define PERCPU_DYNAMIC_RESERVE (28 << 10) #else -#define PERCPU_DYNAMIC_RESERVE (12 << 10) +#define PERCPU_DYNAMIC_RESERVE (20 << 10) #endif extern void *pcpu_base_addr; diff --git a/mm/percpu.c b/mm/percpu.c index 28a830590b4c..867efd38d879 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -78,6 +78,8 @@ #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ #define PCPU_ATOMIC_MAP_MARGIN_LOW 32 #define PCPU_ATOMIC_MAP_MARGIN_HIGH 64 +#define PCPU_EMPTY_POP_PAGES_LOW 2 +#define PCPU_EMPTY_POP_PAGES_HIGH 4 #ifdef CONFIG_SMP /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ @@ -168,9 +170,22 @@ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ */ static int pcpu_nr_empty_pop_pages; -/* balance work is used to populate or destroy chunks asynchronously */ +/* + * Balance work is used to populate or destroy chunks asynchronously. We + * try to keep the number of populated free pages between + * PCPU_EMPTY_POP_PAGES_LOW and HIGH for atomic allocations and at most one + * empty chunk. + */ static void pcpu_balance_workfn(struct work_struct *work); static DECLARE_WORK(pcpu_balance_work, pcpu_balance_workfn); +static bool pcpu_async_enabled __read_mostly; +static bool pcpu_atomic_alloc_failed; + +static void pcpu_schedule_balance_work(void) +{ + if (pcpu_async_enabled) + schedule_work(&pcpu_balance_work); +} static bool pcpu_addr_in_first_chunk(void *addr) { @@ -386,7 +401,8 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic) margin = 3; if (chunk->map_alloc < - chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW) + chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW && + pcpu_async_enabled) schedule_work(&chunk->map_extend_work); } else { margin = PCPU_ATOMIC_MAP_MARGIN_HIGH; @@ -1005,6 +1021,9 @@ area_found: if (chunk != pcpu_reserved_chunk) pcpu_nr_empty_pop_pages -= occ_pages; + if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW) + pcpu_schedule_balance_work(); + /* clear the areas and return address relative to base address */ for_each_possible_cpu(cpu) memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); @@ -1023,6 +1042,11 @@ fail: if (!--warn_limit) pr_info("PERCPU: limit reached, disable warning\n"); } + if (is_atomic) { + /* see the flag handling in pcpu_blance_workfn() */ + pcpu_atomic_alloc_failed = true; + pcpu_schedule_balance_work(); + } return NULL; } @@ -1080,7 +1104,7 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) } /** - * pcpu_balance_workfn - reclaim fully free chunks, workqueue function + * pcpu_balance_workfn - manage the amount of free chunks and populated pages * @work: unused * * Reclaim all fully free chunks except for the first one. @@ -1090,7 +1114,12 @@ static void pcpu_balance_workfn(struct work_struct *work) LIST_HEAD(to_free); struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; struct pcpu_chunk *chunk, *next; + int slot, nr_to_pop, ret; + /* + * There's no reason to keep around multiple unused chunks and VM + * areas can be scarce. Destroy all free chunks except for one. + */ mutex_lock(&pcpu_alloc_mutex); spin_lock_irq(&pcpu_lock); @@ -1118,6 +1147,74 @@ static void pcpu_balance_workfn(struct work_struct *work) pcpu_destroy_chunk(chunk); } + /* + * Ensure there are certain number of free populated pages for + * atomic allocs. Fill up from the most packed so that atomic + * allocs don't increase fragmentation. If atomic allocation + * failed previously, always populate the maximum amount. This + * should prevent atomic allocs larger than PAGE_SIZE from keeping + * failing indefinitely; however, large atomic allocs are not + * something we support properly and can be highly unreliable and + * inefficient. + */ +retry_pop: + if (pcpu_atomic_alloc_failed) { + nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH; + /* best effort anyway, don't worry about synchronization */ + pcpu_atomic_alloc_failed = false; + } else { + nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH - + pcpu_nr_empty_pop_pages, + 0, PCPU_EMPTY_POP_PAGES_HIGH); + } + + for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) { + int nr_unpop = 0, rs, re; + + if (!nr_to_pop) + break; + + spin_lock_irq(&pcpu_lock); + list_for_each_entry(chunk, &pcpu_slot[slot], list) { + nr_unpop = pcpu_unit_pages - chunk->nr_populated; + if (nr_unpop) + break; + } + spin_unlock_irq(&pcpu_lock); + + if (!nr_unpop) + continue; + + /* @chunk can't go away while pcpu_alloc_mutex is held */ + pcpu_for_each_unpop_region(chunk, rs, re, 0, pcpu_unit_pages) { + int nr = min(re - rs, nr_to_pop); + + ret = pcpu_populate_chunk(chunk, rs, rs + nr); + if (!ret) { + nr_to_pop -= nr; + spin_lock_irq(&pcpu_lock); + pcpu_chunk_populated(chunk, rs, rs + nr); + spin_unlock_irq(&pcpu_lock); + } else { + nr_to_pop = 0; + } + + if (!nr_to_pop) + break; + } + } + + if (nr_to_pop) { + /* ran out of chunks to populate, create a new one and retry */ + chunk = pcpu_create_chunk(); + if (chunk) { + spin_lock_irq(&pcpu_lock); + pcpu_chunk_relocate(chunk, -1); + spin_unlock_irq(&pcpu_lock); + goto retry_pop; + } + } + mutex_unlock(&pcpu_alloc_mutex); } @@ -1160,7 +1257,7 @@ void free_percpu(void __percpu *ptr) list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list) if (pos != chunk) { - schedule_work(&pcpu_balance_work); + pcpu_schedule_balance_work(); break; } } @@ -2187,3 +2284,15 @@ void __init percpu_init_late(void) spin_unlock_irqrestore(&pcpu_lock, flags); } } + +/* + * Percpu allocator is initialized early during boot when neither slab or + * workqueue is available. Plug async management until everything is up + * and running. + */ +static int __init percpu_enable_async(void) +{ + pcpu_async_enabled = true; + return 0; +} +subsys_initcall(percpu_enable_async); -- cgit v1.2.3 From 76ba59f8366f2d9282cb5bda9de75b4b68cbe55f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 26 Aug 2014 11:03:16 +0100 Subject: genirq: Add irq_domain-aware core IRQ handler Calling irq_find_mapping from outside a irq_{enter,exit} section is unsafe and produces ugly messages if CONFIG_PROVE_RCU is enabled: If coming from the idle state, the rcu_read_lock call in irq_find_mapping will generate an unpleasant warning: =============================== [ INFO: suspicious RCU usage. ] 3.16.0-rc1+ #135 Not tainted ------------------------------- include/linux/rcupdate.h:871 rcu_read_lock() used illegally while idle! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! 1 lock held by swapper/0/0: #0: (rcu_read_lock){......}, at: [] irq_find_mapping+0x4c/0x198 As this issue is fairly widespread and involves at least three different architectures, a possible solution is to add a new handle_domain_irq entry point into the generic IRQ code that the interrupt controller code can call. This new function takes an irq_domain, and calls into irq_find_domain inside the irq_{enter,exit} block. An additional "lookup" parameter is used to allow non-domain architecture code to be replaced by this as well. Interrupt controllers can then be updated to use the new mechanism. This code is sitting behind a new CONFIG_HANDLE_DOMAIN_IRQ, as not all architectures implement set_irq_regs (yes, mn10300, I'm looking at you...). Reported-by: Vladimir Murzin Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/1409047421-27649-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Jason Cooper --- include/linux/irqdesc.h | 19 +++++++++++++++++++ kernel/irq/Kconfig | 3 +++ kernel/irq/irqdesc.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 472c021a2d4f..ff24667cd86c 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -12,6 +12,8 @@ struct irq_affinity_notify; struct proc_dir_entry; struct module; struct irq_desc; +struct irq_domain; +struct pt_regs; /** * struct irq_desc - interrupt descriptor @@ -118,6 +120,23 @@ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *de int generic_handle_irq(unsigned int irq); +#ifdef CONFIG_HANDLE_DOMAIN_IRQ +/* + * Convert a HW interrupt number to a logical one using a IRQ domain, + * and handle the result interrupt number. Return -EINVAL if + * conversion failed. Providing a NULL domain indicates that the + * conversion has already been done. + */ +int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, + bool lookup, struct pt_regs *regs); + +static inline int handle_domain_irq(struct irq_domain *domain, + unsigned int hwirq, struct pt_regs *regs) +{ + return __handle_domain_irq(domain, hwirq, true, regs); +} +#endif + /* Test to see if a driver has successfully requested an irq */ static inline int irq_has_action(unsigned int irq) { diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index d269cecdfbf0..225086b2652e 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -55,6 +55,9 @@ config GENERIC_IRQ_CHIP config IRQ_DOMAIN bool +config HANDLE_DOMAIN_IRQ + bool + config IRQ_DOMAIN_DEBUG bool "Expose hardware/virtual IRQ mapping via debugfs" depends on IRQ_DOMAIN && DEBUG_FS diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 1487a123db5c..a1782f88f0af 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "internals.h" @@ -336,6 +337,47 @@ int generic_handle_irq(unsigned int irq) } EXPORT_SYMBOL_GPL(generic_handle_irq); +#ifdef CONFIG_HANDLE_DOMAIN_IRQ +/** + * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * @lookup: Whether to perform the domain lookup or not + * @regs: Register file coming from the low-level handling code + * + * Returns: 0 on success, or -EINVAL if conversion has failed + */ +int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, + bool lookup, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + unsigned int irq = hwirq; + int ret = 0; + + irq_enter(); + +#ifdef CONFIG_IRQ_DOMAIN + if (lookup) + irq = irq_find_mapping(domain, hwirq); +#endif + + /* + * Some hardware gives randomly wrong interrupts. Rather + * than crashing, do something sensible. + */ + if (unlikely(!irq || irq >= nr_irqs)) { + ack_bad_irq(irq); + ret = -EINVAL; + } else { + generic_handle_irq(irq); + } + + irq_exit(); + set_irq_regs(old_regs); + return ret; +} +#endif + /* Dynamic interrupt handling */ /** -- cgit v1.2.3 From a4412fc9486ec85686c6c7929e7e829f62ae377e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 21 Jul 2014 18:49:14 -0700 Subject: seccomp,x86,arm,mips,s390: Remove nr parameter from secure_computing The secure_computing function took a syscall number parameter, but it only paid any attention to that parameter if seccomp mode 1 was enabled. Rather than coming up with a kludge to get the parameter to work in mode 2, just remove the parameter. To avoid churn in arches that don't have seccomp filters (and may not even support syscall_get_nr right now), this leaves the parameter in secure_computing_strict, which is now a real function. For ARM, this is a bit ugly due to the fact that ARM conditionally supports seccomp filters. Fixing that would probably only be a couple of lines of code, but it should be coordinated with the audit maintainers. This will be a slight slowdown on some arches. The right fix is to pass in all of seccomp_data instead of trying to make just the syscall nr part be fast. This is a prerequisite for making two-phase seccomp work cleanly. Cc: Russell King Cc: linux-arm-kernel@lists.infradead.org Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Cc: x86@kernel.org Cc: Kees Cook Signed-off-by: Andy Lutomirski Signed-off-by: Kees Cook --- arch/arm/kernel/ptrace.c | 7 ++++- arch/mips/kernel/ptrace.c | 2 +- arch/s390/kernel/ptrace.c | 2 +- arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/vsyscall_64.c | 2 +- include/linux/seccomp.h | 21 +++++++------- kernel/seccomp.c | 64 ++++++++++++++++++++++++++++++------------- 7 files changed, 66 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 0c27ed6f3f23..5e772a21ab97 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -933,8 +933,13 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) current_thread_info()->syscall = scno; /* Do the secure computing check first; failures should be fast. */ - if (secure_computing(scno) == -1) +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER + if (secure_computing() == -1) return -1; +#else + /* XXX: remove this once OABI gets fixed */ + secure_computing_strict(scno); +#endif if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 645b3c4fcfba..f7aac5b57b4b 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -770,7 +770,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) long ret = 0; user_exit(); - if (secure_computing(syscall) == -1) + if (secure_computing() == -1) return -1; if (test_thread_flag(TIF_SYSCALL_TRACE) && diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 5dc7ad9e2fbf..bebacad48305 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -803,7 +803,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) long ret = 0; /* Do the secure computing check first. */ - if (secure_computing(regs->gprs[2])) { + if (secure_computing()) { /* seccomp failures shouldn't expose any additional code. */ ret = -1; goto out; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 678c0ada3b3c..93c182a00506 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1471,7 +1471,7 @@ long syscall_trace_enter(struct pt_regs *regs) regs->flags |= X86_EFLAGS_TF; /* do the secure computing check first */ - if (secure_computing(regs->orig_ax)) { + if (secure_computing()) { /* seccomp failures shouldn't expose any additional code. */ ret = -1L; goto out; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index e1e1e80fc6a6..957779f4eb40 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -216,7 +216,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) */ regs->orig_ax = syscall_nr; regs->ax = -ENOSYS; - tmp = secure_computing(syscall_nr); + tmp = secure_computing(); if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { warn_bad_vsyscall(KERN_DEBUG, regs, "seccomp tried to change syscall nr or ip"); diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 5d586a45a319..aa3c040230be 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -27,19 +27,17 @@ struct seccomp { struct seccomp_filter *filter; }; -extern int __secure_computing(int); -static inline int secure_computing(int this_syscall) +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER +extern int __secure_computing(void); +static inline int secure_computing(void) { if (unlikely(test_thread_flag(TIF_SECCOMP))) - return __secure_computing(this_syscall); + return __secure_computing(); return 0; } - -/* A wrapper for architectures supporting only SECCOMP_MODE_STRICT. */ -static inline void secure_computing_strict(int this_syscall) -{ - BUG_ON(secure_computing(this_syscall) != 0); -} +#else +extern void secure_computing_strict(int this_syscall); +#endif extern long prctl_get_seccomp(void); extern long prctl_set_seccomp(unsigned long, char __user *); @@ -56,8 +54,11 @@ static inline int seccomp_mode(struct seccomp *s) struct seccomp { }; struct seccomp_filter { }; -static inline int secure_computing(int this_syscall) { return 0; } +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER +static inline int secure_computing(void) { return 0; } +#else static inline void secure_computing_strict(int this_syscall) { return; } +#endif static inline long prctl_get_seccomp(void) { diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 44eb005c6695..5e738e0dd2e9 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -23,8 +23,11 @@ /* #define SECCOMP_DEBUG 1 */ -#ifdef CONFIG_SECCOMP_FILTER +#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER #include +#endif + +#ifdef CONFIG_SECCOMP_FILTER #include #include #include @@ -172,7 +175,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) * * Returns valid seccomp BPF response codes. */ -static u32 seccomp_run_filters(int syscall) +static u32 seccomp_run_filters(void) { struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); struct seccomp_data sd; @@ -564,10 +567,43 @@ static int mode1_syscalls_32[] = { }; #endif -int __secure_computing(int this_syscall) +static void __secure_computing_strict(int this_syscall) +{ + int *syscall_whitelist = mode1_syscalls; +#ifdef CONFIG_COMPAT + if (is_compat_task()) + syscall_whitelist = mode1_syscalls_32; +#endif + do { + if (*syscall_whitelist == this_syscall) + return; + } while (*++syscall_whitelist); + +#ifdef SECCOMP_DEBUG + dump_stack(); +#endif + audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL); + do_exit(SIGKILL); +} + +#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER +void secure_computing_strict(int this_syscall) +{ + int mode = current->seccomp.mode; + + if (mode == 0) + return; + else if (mode == SECCOMP_MODE_STRICT) + __secure_computing_strict(this_syscall); + else + BUG(); +} +#else +int __secure_computing(void) { + struct pt_regs *regs = task_pt_regs(current); + int this_syscall = syscall_get_nr(current, regs); int exit_sig = 0; - int *syscall; u32 ret; /* @@ -578,23 +614,12 @@ int __secure_computing(int this_syscall) switch (current->seccomp.mode) { case SECCOMP_MODE_STRICT: - syscall = mode1_syscalls; -#ifdef CONFIG_COMPAT - if (is_compat_task()) - syscall = mode1_syscalls_32; -#endif - do { - if (*syscall == this_syscall) - return 0; - } while (*++syscall); - exit_sig = SIGKILL; - ret = SECCOMP_RET_KILL; - break; + __secure_computing_strict(this_syscall); + return 0; #ifdef CONFIG_SECCOMP_FILTER case SECCOMP_MODE_FILTER: { int data; - struct pt_regs *regs = task_pt_regs(current); - ret = seccomp_run_filters(this_syscall); + ret = seccomp_run_filters(); data = ret & SECCOMP_RET_DATA; ret &= SECCOMP_RET_ACTION; switch (ret) { @@ -652,9 +677,10 @@ int __secure_computing(int this_syscall) #ifdef CONFIG_SECCOMP_FILTER skip: audit_seccomp(this_syscall, exit_sig, ret); -#endif return -1; +#endif } +#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ long prctl_get_seccomp(void) { -- cgit v1.2.3 From 13aa72f0fd0a9f98a41cefb662487269e2f1ad65 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 21 Jul 2014 18:49:15 -0700 Subject: seccomp: Refactor the filter callback and the API The reason I did this is to add a seccomp API that will be usable for an x86 fast path. The x86 entry code needs to use a rather expensive slow path for a syscall that might be visible to things like ptrace. By splitting seccomp into two phases, we can check whether we need the slow path and then use the fast path in if the filter allows the syscall or just returns some errno. As a side effect, I think the new code is much easier to understand than the old code. This has one user-visible effect: the audit record written for SECCOMP_RET_TRACE is now a simple indication that SECCOMP_RET_TRACE happened. It used to depend in a complicated way on what the tracer did. I couldn't make much sense of it. Signed-off-by: Andy Lutomirski Signed-off-by: Kees Cook --- include/linux/seccomp.h | 6 ++ kernel/seccomp.c | 190 +++++++++++++++++++++++++++++++----------------- 2 files changed, 130 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index aa3c040230be..38851085e481 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -35,6 +35,12 @@ static inline int secure_computing(void) return __secure_computing(); return 0; } + +#define SECCOMP_PHASE1_OK 0 +#define SECCOMP_PHASE1_SKIP 1 + +extern u32 seccomp_phase1(void); +int seccomp_phase2(u32 phase1_result); #else extern void secure_computing_strict(int this_syscall); #endif diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 5e738e0dd2e9..6c8528ce9df9 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -21,8 +21,6 @@ #include #include -/* #define SECCOMP_DEBUG 1 */ - #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER #include #endif @@ -601,10 +599,21 @@ void secure_computing_strict(int this_syscall) #else int __secure_computing(void) { - struct pt_regs *regs = task_pt_regs(current); - int this_syscall = syscall_get_nr(current, regs); - int exit_sig = 0; - u32 ret; + u32 phase1_result = seccomp_phase1(); + + if (likely(phase1_result == SECCOMP_PHASE1_OK)) + return 0; + else if (likely(phase1_result == SECCOMP_PHASE1_SKIP)) + return -1; + else + return seccomp_phase2(phase1_result); +} + +#ifdef CONFIG_SECCOMP_FILTER +static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs) +{ + u32 filter_ret, action; + int data; /* * Make sure that any changes to mode from another thread have @@ -612,73 +621,122 @@ int __secure_computing(void) */ rmb(); - switch (current->seccomp.mode) { + filter_ret = seccomp_run_filters(); + data = filter_ret & SECCOMP_RET_DATA; + action = filter_ret & SECCOMP_RET_ACTION; + + switch (action) { + case SECCOMP_RET_ERRNO: + /* Set the low-order 16-bits as a errno. */ + syscall_set_return_value(current, regs, + -data, 0); + goto skip; + + case SECCOMP_RET_TRAP: + /* Show the handler the original registers. */ + syscall_rollback(current, regs); + /* Let the filter pass back 16 bits of data. */ + seccomp_send_sigsys(this_syscall, data); + goto skip; + + case SECCOMP_RET_TRACE: + return filter_ret; /* Save the rest for phase 2. */ + + case SECCOMP_RET_ALLOW: + return SECCOMP_PHASE1_OK; + + case SECCOMP_RET_KILL: + default: + audit_seccomp(this_syscall, SIGSYS, action); + do_exit(SIGSYS); + } + + unreachable(); + +skip: + audit_seccomp(this_syscall, 0, action); + return SECCOMP_PHASE1_SKIP; +} +#endif + +/** + * seccomp_phase1() - run fast path seccomp checks on the current syscall + * + * This only reads pt_regs via the syscall_xyz helpers. The only change + * it will make to pt_regs is via syscall_set_return_value, and it will + * only do that if it returns SECCOMP_PHASE1_SKIP. + * + * It may also call do_exit or force a signal; these actions must be + * safe. + * + * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should + * be processed normally. + * + * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be + * invoked. In this case, seccomp_phase1 will have set the return value + * using syscall_set_return_value. + * + * If it returns anything else, then the return value should be passed + * to seccomp_phase2 from a context in which ptrace hooks are safe. + */ +u32 seccomp_phase1(void) +{ + int mode = current->seccomp.mode; + struct pt_regs *regs = task_pt_regs(current); + int this_syscall = syscall_get_nr(current, regs); + + switch (mode) { case SECCOMP_MODE_STRICT: - __secure_computing_strict(this_syscall); - return 0; + __secure_computing_strict(this_syscall); /* may call do_exit */ + return SECCOMP_PHASE1_OK; #ifdef CONFIG_SECCOMP_FILTER - case SECCOMP_MODE_FILTER: { - int data; - ret = seccomp_run_filters(); - data = ret & SECCOMP_RET_DATA; - ret &= SECCOMP_RET_ACTION; - switch (ret) { - case SECCOMP_RET_ERRNO: - /* Set the low-order 16-bits as a errno. */ - syscall_set_return_value(current, regs, - -data, 0); - goto skip; - case SECCOMP_RET_TRAP: - /* Show the handler the original registers. */ - syscall_rollback(current, regs); - /* Let the filter pass back 16 bits of data. */ - seccomp_send_sigsys(this_syscall, data); - goto skip; - case SECCOMP_RET_TRACE: - /* Skip these calls if there is no tracer. */ - if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { - syscall_set_return_value(current, regs, - -ENOSYS, 0); - goto skip; - } - /* Allow the BPF to provide the event message */ - ptrace_event(PTRACE_EVENT_SECCOMP, data); - /* - * The delivery of a fatal signal during event - * notification may silently skip tracer notification. - * Terminating the task now avoids executing a system - * call that may not be intended. - */ - if (fatal_signal_pending(current)) - break; - if (syscall_get_nr(current, regs) < 0) - goto skip; /* Explicit request to skip. */ - - return 0; - case SECCOMP_RET_ALLOW: - return 0; - case SECCOMP_RET_KILL: - default: - break; - } - exit_sig = SIGSYS; - break; - } + case SECCOMP_MODE_FILTER: + return __seccomp_phase1_filter(this_syscall, regs); #endif default: BUG(); } +} -#ifdef SECCOMP_DEBUG - dump_stack(); -#endif - audit_seccomp(this_syscall, exit_sig, ret); - do_exit(exit_sig); -#ifdef CONFIG_SECCOMP_FILTER -skip: - audit_seccomp(this_syscall, exit_sig, ret); - return -1; -#endif +/** + * seccomp_phase2() - finish slow path seccomp work for the current syscall + * @phase1_result: The return value from seccomp_phase1() + * + * This must be called from a context in which ptrace hooks can be used. + * + * Returns 0 if the syscall should be processed or -1 to skip the syscall. + */ +int seccomp_phase2(u32 phase1_result) +{ + struct pt_regs *regs = task_pt_regs(current); + u32 action = phase1_result & SECCOMP_RET_ACTION; + int data = phase1_result & SECCOMP_RET_DATA; + + BUG_ON(action != SECCOMP_RET_TRACE); + + audit_seccomp(syscall_get_nr(current, regs), 0, action); + + /* Skip these calls if there is no tracer. */ + if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { + syscall_set_return_value(current, regs, + -ENOSYS, 0); + return -1; + } + + /* Allow the BPF to provide the event message */ + ptrace_event(PTRACE_EVENT_SECCOMP, data); + /* + * The delivery of a fatal signal during event + * notification may silently skip tracer notification. + * Terminating the task now avoids executing a system + * call that may not be intended. + */ + if (fatal_signal_pending(current)) + do_exit(SIGSYS); + if (syscall_get_nr(current, regs) < 0) + return -1; /* Explicit request to skip. */ + + return 0; } #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ -- cgit v1.2.3 From d39bd00deabe57420f2a3669eb71b0e0c4997184 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 21 Jul 2014 18:49:16 -0700 Subject: seccomp: Allow arch code to provide seccomp_data populate_seccomp_data is expensive: it works by inspecting task_pt_regs and various other bits to piece together all the information, and it's does so in multiple partially redundant steps. Arch-specific code in the syscall entry path can do much better. Admittedly this adds a bit of additional room for error, but the speedup should be worth it. Signed-off-by: Andy Lutomirski Signed-off-by: Kees Cook --- include/linux/seccomp.h | 2 +- kernel/seccomp.c | 32 +++++++++++++++++++------------- 2 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 38851085e481..a19ddacdac30 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -39,7 +39,7 @@ static inline int secure_computing(void) #define SECCOMP_PHASE1_OK 0 #define SECCOMP_PHASE1_SKIP 1 -extern u32 seccomp_phase1(void); +extern u32 seccomp_phase1(struct seccomp_data *sd); int seccomp_phase2(u32 phase1_result); #else extern void secure_computing_strict(int this_syscall); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 6c8528ce9df9..1285cb205d49 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -173,10 +173,10 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) * * Returns valid seccomp BPF response codes. */ -static u32 seccomp_run_filters(void) +static u32 seccomp_run_filters(struct seccomp_data *sd) { struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); - struct seccomp_data sd; + struct seccomp_data sd_local; u32 ret = SECCOMP_RET_ALLOW; /* Ensure unexpected behavior doesn't result in failing open. */ @@ -186,14 +186,17 @@ static u32 seccomp_run_filters(void) /* Make sure cross-thread synced filter points somewhere sane. */ smp_read_barrier_depends(); - populate_seccomp_data(&sd); + if (!sd) { + populate_seccomp_data(&sd_local); + sd = &sd_local; + } /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). */ for (; f; f = f->prev) { - u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd); + u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd); if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; @@ -599,7 +602,7 @@ void secure_computing_strict(int this_syscall) #else int __secure_computing(void) { - u32 phase1_result = seccomp_phase1(); + u32 phase1_result = seccomp_phase1(NULL); if (likely(phase1_result == SECCOMP_PHASE1_OK)) return 0; @@ -610,7 +613,7 @@ int __secure_computing(void) } #ifdef CONFIG_SECCOMP_FILTER -static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs) +static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd) { u32 filter_ret, action; int data; @@ -621,20 +624,20 @@ static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs) */ rmb(); - filter_ret = seccomp_run_filters(); + filter_ret = seccomp_run_filters(sd); data = filter_ret & SECCOMP_RET_DATA; action = filter_ret & SECCOMP_RET_ACTION; switch (action) { case SECCOMP_RET_ERRNO: /* Set the low-order 16-bits as a errno. */ - syscall_set_return_value(current, regs, + syscall_set_return_value(current, task_pt_regs(current), -data, 0); goto skip; case SECCOMP_RET_TRAP: /* Show the handler the original registers. */ - syscall_rollback(current, regs); + syscall_rollback(current, task_pt_regs(current)); /* Let the filter pass back 16 bits of data. */ seccomp_send_sigsys(this_syscall, data); goto skip; @@ -661,11 +664,14 @@ skip: /** * seccomp_phase1() - run fast path seccomp checks on the current syscall + * @arg sd: The seccomp_data or NULL * * This only reads pt_regs via the syscall_xyz helpers. The only change * it will make to pt_regs is via syscall_set_return_value, and it will * only do that if it returns SECCOMP_PHASE1_SKIP. * + * If sd is provided, it will not read pt_regs at all. + * * It may also call do_exit or force a signal; these actions must be * safe. * @@ -679,11 +685,11 @@ skip: * If it returns anything else, then the return value should be passed * to seccomp_phase2 from a context in which ptrace hooks are safe. */ -u32 seccomp_phase1(void) +u32 seccomp_phase1(struct seccomp_data *sd) { int mode = current->seccomp.mode; - struct pt_regs *regs = task_pt_regs(current); - int this_syscall = syscall_get_nr(current, regs); + int this_syscall = sd ? sd->nr : + syscall_get_nr(current, task_pt_regs(current)); switch (mode) { case SECCOMP_MODE_STRICT: @@ -691,7 +697,7 @@ u32 seccomp_phase1(void) return SECCOMP_PHASE1_OK; #ifdef CONFIG_SECCOMP_FILTER case SECCOMP_MODE_FILTER: - return __seccomp_phase1_filter(this_syscall, regs); + return __seccomp_phase1_filter(this_syscall, sd); #endif default: BUG(); -- cgit v1.2.3 From 26ae4980b5e4739af93543a147facb421fb78ae8 Mon Sep 17 00:00:00 2001 From: Aaron Sierra Date: Fri, 15 Aug 2014 16:07:48 -0500 Subject: fsl_ifc: Fix csor_ext position in fsl_ifc_regs According to Freescale manuals, the IFC_CSORn_EXT register is located immediately _after_ the bank's IFC_CSORn register. This patch adjusts the csor_ext member of and reserved register arrays immediately surrounding the csor_cs structure to provide proper access to this register. Signed-off-by: Aaron Sierra Acked-by: Prabhakar Kushwaha Signed-off-by: Scott Wood --- include/linux/fsl_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index f49ddb1b2273..84d60cb841b1 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -781,13 +781,13 @@ struct fsl_ifc_regs { __be32 amask; u32 res4[0x2]; } amask_cs[FSL_IFC_BANK_COUNT]; - u32 res5[0x17]; + u32 res5[0x18]; struct { - __be32 csor_ext; __be32 csor; + __be32 csor_ext; u32 res6; } csor_cs[FSL_IFC_BANK_COUNT]; - u32 res7[0x19]; + u32 res7[0x18]; struct { __be32 ftim[4]; u32 res8[0x8]; -- cgit v1.2.3 From 940001762ac514810e305aab356983829e5fa82a Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 3 Sep 2014 09:22:36 +0800 Subject: lib/rhashtable: allow user to set the minimum shifts of shrinking Although rhashtable library allows user to specify a quiet big size for user's created hash table, the table may be shrunk to a very small size - HASH_MIN_SIZE(4) after object is removed from the table at the first time. Subsequently, even if the total amount of objects saved in the table is quite lower than user's initial setting in a long time, the hash table size is still dynamically adjusted by rhashtable_shrink() or rhashtable_expand() each time object is inserted or removed from the table. However, as synchronize_rcu() has to be called when table is shrunk or expanded by the two functions, we should permit user to set the minimum table size through configuring the minimum number of shifts according to user specific requirement, avoiding these expensive actions of shrinking or expanding because of calling synchronize_rcu(). Signed-off-by: Ying Xue Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 ++ lib/rhashtable.c | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 36826c0166c5..fb298e9d6d3a 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -44,6 +44,7 @@ struct rhashtable; * @head_offset: Offset of rhash_head in struct to be hashed * @hash_rnd: Seed to use while hashing * @max_shift: Maximum number of shifts while expanding + * @min_shift: Minimum number of shifts while shrinking * @hashfn: Function to hash key * @obj_hashfn: Function to hash object * @grow_decision: If defined, may return true if table should expand @@ -57,6 +58,7 @@ struct rhashtable_params { size_t head_offset; u32 hash_rnd; size_t max_shift; + size_t min_shift; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; bool (*grow_decision)(const struct rhashtable *ht, diff --git a/lib/rhashtable.c b/lib/rhashtable.c index a2c78810ebc1..8dfec3f26d4c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -298,7 +298,7 @@ int rhashtable_shrink(struct rhashtable *ht, gfp_t flags) ASSERT_RHT_MUTEX(ht); - if (tbl->size <= HASH_MIN_SIZE) + if (ht->shift <= ht->p.min_shift) return 0; ntbl = bucket_table_alloc(tbl->size / 2, flags); @@ -506,9 +506,10 @@ void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash, } EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); -static size_t rounded_hashtable_size(unsigned int nelem) +static size_t rounded_hashtable_size(struct rhashtable_params *params) { - return max(roundup_pow_of_two(nelem * 4 / 3), HASH_MIN_SIZE); + return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), + 1UL << params->min_shift); } /** @@ -566,8 +567,11 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) (!params->key_len && !params->obj_hashfn)) return -EINVAL; + params->min_shift = max_t(size_t, params->min_shift, + ilog2(HASH_MIN_SIZE)); + if (params->nelem_hint) - size = rounded_hashtable_size(params->nelem_hint); + size = rounded_hashtable_size(params); tbl = bucket_table_alloc(size, GFP_KERNEL); if (tbl == NULL) -- cgit v1.2.3 From 03e9f0cac5da6af85758276cb4624caf5911f2b9 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 3 Sep 2014 13:02:56 +0200 Subject: pinctrl: clean up after enable refactoring commit 2243a87d90b42eb38bc281957df3e57c712b5e56 "pinctrl: avoid duplicated calling enable_pinmux_setting for a pin" removed the .disable callback from the struct pinmux_ops, making the .enable() callback the only remaining callback. However .enable() is a bad name as it seems to imply that a muxing can also be disabled. Rename the callback to .set_mux() and also take this opportunity to clean out any remaining mentions of .disable() from the documentation. Acked-by: Stephen Warren Acked-by: Bjorn Andersson Acked-by: Fan Wu Signed-off-by: Linus Walleij --- Documentation/pinctrl.txt | 14 ++------------ drivers/pinctrl/berlin/berlin.c | 8 ++++---- drivers/pinctrl/mvebu/pinctrl-mvebu.c | 6 +++--- drivers/pinctrl/nomadik/pinctrl-abx500.c | 6 +++--- drivers/pinctrl/nomadik/pinctrl-nomadik.c | 6 +++--- drivers/pinctrl/pinctrl-adi2.c | 6 +++--- drivers/pinctrl/pinctrl-as3722.c | 4 ++-- drivers/pinctrl/pinctrl-at91.c | 6 +++--- drivers/pinctrl/pinctrl-bcm281xx.c | 8 ++++---- drivers/pinctrl/pinctrl-bcm2835.c | 4 ++-- drivers/pinctrl/pinctrl-imx.c | 6 +++--- drivers/pinctrl/pinctrl-imx1-core.c | 6 +++--- drivers/pinctrl/pinctrl-lantiq.c | 8 ++++---- drivers/pinctrl/pinctrl-mxs.c | 6 +++--- drivers/pinctrl/pinctrl-palmas.c | 5 +++-- drivers/pinctrl/pinctrl-rockchip.c | 6 +++--- drivers/pinctrl/pinctrl-single.c | 4 ++-- drivers/pinctrl/pinctrl-st.c | 6 +++--- drivers/pinctrl/pinctrl-tb10x.c | 4 ++-- drivers/pinctrl/pinctrl-tegra-xusb.c | 8 ++++---- drivers/pinctrl/pinctrl-tegra.c | 7 ++++--- drivers/pinctrl/pinctrl-tz1090-pdc.c | 7 ++++--- drivers/pinctrl/pinctrl-tz1090.c | 6 +++--- drivers/pinctrl/pinctrl-u300.c | 6 +++--- drivers/pinctrl/pinmux.c | 10 +++++----- drivers/pinctrl/qcom/pinctrl-msm.c | 8 ++++---- drivers/pinctrl/samsung/pinctrl-exynos5440.c | 7 ++++--- drivers/pinctrl/samsung/pinctrl-samsung.c | 7 ++++--- drivers/pinctrl/sh-pfc/pinctrl.c | 6 +++--- drivers/pinctrl/sirf/pinctrl-sirf.c | 7 ++++--- drivers/pinctrl/spear/pinctrl-spear.c | 4 ++-- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 8 ++++---- drivers/pinctrl/vt8500/pinctrl-wmt.c | 8 ++++---- include/linux/pinctrl/pinmux.h | 7 +++---- 34 files changed, 110 insertions(+), 115 deletions(-) (limited to 'include/linux') diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt index 23f1590f49fe..b8f2147b96dd 100644 --- a/Documentation/pinctrl.txt +++ b/Documentation/pinctrl.txt @@ -702,7 +702,7 @@ static int foo_get_groups(struct pinctrl_dev *pctldev, unsigned selector, return 0; } -int foo_enable(struct pinctrl_dev *pctldev, unsigned selector, +int foo_set_mux(struct pinctrl_dev *pctldev, unsigned selector, unsigned group) { u8 regbit = (1 << selector + group); @@ -711,21 +711,11 @@ int foo_enable(struct pinctrl_dev *pctldev, unsigned selector, return 0; } -void foo_disable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) -{ - u8 regbit = (1 << selector + group); - - writeb((readb(MUX) & ~(regbit)), MUX) - return 0; -} - struct pinmux_ops foo_pmxops = { .get_functions_count = foo_get_functions_count, .get_function_name = foo_get_fname, .get_function_groups = foo_get_groups, - .enable = foo_enable, - .disable = foo_disable, + .set_mux = foo_set_mux, }; /* Pinmux operations are handled by some pin controller */ diff --git a/drivers/pinctrl/berlin/berlin.c b/drivers/pinctrl/berlin/berlin.c index 86db2235ab00..61f1bf0e60ba 100644 --- a/drivers/pinctrl/berlin/berlin.c +++ b/drivers/pinctrl/berlin/berlin.c @@ -170,9 +170,9 @@ berlin_pinctrl_find_function_by_name(struct berlin_pinctrl *pctrl, return NULL; } -static int berlin_pinmux_enable(struct pinctrl_dev *pctrl_dev, - unsigned function, - unsigned group) +static int berlin_pinmux_set(struct pinctrl_dev *pctrl_dev, + unsigned function, + unsigned group) { struct berlin_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctrl_dev); const struct berlin_desc_group *group_desc = pctrl->desc->groups + group; @@ -197,7 +197,7 @@ static const struct pinmux_ops berlin_pinmux_ops = { .get_functions_count = &berlin_pinmux_get_functions_count, .get_function_name = &berlin_pinmux_get_function_name, .get_function_groups = &berlin_pinmux_get_function_groups, - .enable = &berlin_pinmux_enable, + .set_mux = &berlin_pinmux_set, }; static int berlin_pinctrl_add_function(struct berlin_pinctrl *pctrl, diff --git a/drivers/pinctrl/mvebu/pinctrl-mvebu.c b/drivers/pinctrl/mvebu/pinctrl-mvebu.c index 9908374f8f92..f3b426cdaf8f 100644 --- a/drivers/pinctrl/mvebu/pinctrl-mvebu.c +++ b/drivers/pinctrl/mvebu/pinctrl-mvebu.c @@ -259,8 +259,8 @@ static int mvebu_pinmux_get_groups(struct pinctrl_dev *pctldev, unsigned fid, return 0; } -static int mvebu_pinmux_enable(struct pinctrl_dev *pctldev, unsigned fid, - unsigned gid) +static int mvebu_pinmux_set(struct pinctrl_dev *pctldev, unsigned fid, + unsigned gid) { struct mvebu_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); struct mvebu_pinctrl_function *func = &pctl->functions[fid]; @@ -344,7 +344,7 @@ static const struct pinmux_ops mvebu_pinmux_ops = { .get_function_groups = mvebu_pinmux_get_groups, .gpio_request_enable = mvebu_pinmux_gpio_request_enable, .gpio_set_direction = mvebu_pinmux_gpio_set_direction, - .enable = mvebu_pinmux_enable, + .set_mux = mvebu_pinmux_set, }; static int mvebu_pinctrl_get_groups_count(struct pinctrl_dev *pctldev) diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c index a53a689a2bfa..b0289824cf73 100644 --- a/drivers/pinctrl/nomadik/pinctrl-abx500.c +++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c @@ -709,8 +709,8 @@ static int abx500_pmx_get_func_groups(struct pinctrl_dev *pctldev, return 0; } -static int abx500_pmx_enable(struct pinctrl_dev *pctldev, unsigned function, - unsigned group) +static int abx500_pmx_set(struct pinctrl_dev *pctldev, unsigned function, + unsigned group) { struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev); struct gpio_chip *chip = &pct->chip; @@ -784,7 +784,7 @@ static const struct pinmux_ops abx500_pinmux_ops = { .get_functions_count = abx500_pmx_get_funcs_cnt, .get_function_name = abx500_pmx_get_func_name, .get_function_groups = abx500_pmx_get_func_groups, - .enable = abx500_pmx_enable, + .set_mux = abx500_pmx_set, .gpio_request_enable = abx500_gpio_request_enable, .gpio_disable_free = abx500_gpio_disable_free, }; diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index e7cab07eef47..c093d75a022a 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -1647,8 +1647,8 @@ static int nmk_pmx_get_func_groups(struct pinctrl_dev *pctldev, return 0; } -static int nmk_pmx_enable(struct pinctrl_dev *pctldev, unsigned function, - unsigned group) +static int nmk_pmx_set(struct pinctrl_dev *pctldev, unsigned function, + unsigned group) { struct nmk_pinctrl *npct = pinctrl_dev_get_drvdata(pctldev); const struct nmk_pingroup *g; @@ -1810,7 +1810,7 @@ static const struct pinmux_ops nmk_pinmux_ops = { .get_functions_count = nmk_pmx_get_funcs_cnt, .get_function_name = nmk_pmx_get_func_name, .get_function_groups = nmk_pmx_get_func_groups, - .enable = nmk_pmx_enable, + .set_mux = nmk_pmx_set, .gpio_request_enable = nmk_gpio_request_enable, .gpio_disable_free = nmk_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c index b092b93c67a1..e02943b0285d 100644 --- a/drivers/pinctrl/pinctrl-adi2.c +++ b/drivers/pinctrl/pinctrl-adi2.c @@ -619,8 +619,8 @@ static struct pinctrl_ops adi_pctrl_ops = { .get_group_pins = adi_get_group_pins, }; -static int adi_pinmux_enable(struct pinctrl_dev *pctldev, unsigned func_id, - unsigned group_id) +static int adi_pinmux_set(struct pinctrl_dev *pctldev, unsigned func_id, + unsigned group_id) { struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev); struct gpio_port *port; @@ -698,7 +698,7 @@ static int adi_pinmux_request_gpio(struct pinctrl_dev *pctldev, } static struct pinmux_ops adi_pinmux_ops = { - .enable = adi_pinmux_enable, + .set_mux = adi_pinmux_set, .get_functions_count = adi_pinmux_get_funcs_count, .get_function_name = adi_pinmux_get_func_name, .get_function_groups = adi_pinmux_get_groups, diff --git a/drivers/pinctrl/pinctrl-as3722.c b/drivers/pinctrl/pinctrl-as3722.c index 0e4ec91f4d49..1f790a4b83fe 100644 --- a/drivers/pinctrl/pinctrl-as3722.c +++ b/drivers/pinctrl/pinctrl-as3722.c @@ -230,7 +230,7 @@ static int as3722_pinctrl_get_func_groups(struct pinctrl_dev *pctldev, return 0; } -static int as3722_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function, +static int as3722_pinctrl_set(struct pinctrl_dev *pctldev, unsigned function, unsigned group) { struct as3722_pctrl_info *as_pci = pinctrl_dev_get_drvdata(pctldev); @@ -327,7 +327,7 @@ static const struct pinmux_ops as3722_pinmux_ops = { .get_functions_count = as3722_pinctrl_get_funcs_count, .get_function_name = as3722_pinctrl_get_func_name, .get_function_groups = as3722_pinctrl_get_func_groups, - .enable = as3722_pinctrl_enable, + .set_mux = as3722_pinctrl_set, .gpio_request_enable = as3722_pinctrl_gpio_request_enable, .gpio_set_direction = as3722_pinctrl_gpio_set_direction, }; diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index af1ba4fc150d..4839c36a3a1c 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -554,8 +554,8 @@ static void at91_mux_gpio_enable(void __iomem *pio, unsigned mask, bool input) writel_relaxed(mask, pio + (input ? PIO_ODR : PIO_OER)); } -static int at91_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int at91_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct at91_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); const struct at91_pmx_pin *pins_conf = info->groups[group].pins_conf; @@ -684,7 +684,7 @@ static const struct pinmux_ops at91_pmx_ops = { .get_functions_count = at91_pmx_get_funcs_count, .get_function_name = at91_pmx_get_func_name, .get_function_groups = at91_pmx_get_groups, - .enable = at91_pmx_enable, + .set_mux = at91_pmx_set, .gpio_request_enable = at91_gpio_request_enable, .gpio_disable_free = at91_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-bcm281xx.c b/drivers/pinctrl/pinctrl-bcm281xx.c index c5ca9e633fff..a26e0c2ba33e 100644 --- a/drivers/pinctrl/pinctrl-bcm281xx.c +++ b/drivers/pinctrl/pinctrl-bcm281xx.c @@ -1055,9 +1055,9 @@ static int bcm281xx_pinctrl_get_fcn_groups(struct pinctrl_dev *pctldev, return 0; } -static int bcm281xx_pinmux_enable(struct pinctrl_dev *pctldev, - unsigned function, - unsigned group) +static int bcm281xx_pinmux_set(struct pinctrl_dev *pctldev, + unsigned function, + unsigned group) { struct bcm281xx_pinctrl_data *pdata = pinctrl_dev_get_drvdata(pctldev); const struct bcm281xx_pin_function *f = &pdata->functions[function]; @@ -1084,7 +1084,7 @@ static struct pinmux_ops bcm281xx_pinctrl_pinmux_ops = { .get_functions_count = bcm281xx_pinctrl_get_fcns_count, .get_function_name = bcm281xx_pinctrl_get_fcn_name, .get_function_groups = bcm281xx_pinctrl_get_fcn_groups, - .enable = bcm281xx_pinmux_enable, + .set_mux = bcm281xx_pinmux_set, }; static int bcm281xx_pinctrl_pin_config_get(struct pinctrl_dev *pctldev, diff --git a/drivers/pinctrl/pinctrl-bcm2835.c b/drivers/pinctrl/pinctrl-bcm2835.c index 5bcfd7ace0cd..eabba02f71f9 100644 --- a/drivers/pinctrl/pinctrl-bcm2835.c +++ b/drivers/pinctrl/pinctrl-bcm2835.c @@ -830,7 +830,7 @@ static int bcm2835_pmx_get_function_groups(struct pinctrl_dev *pctldev, return 0; } -static int bcm2835_pmx_enable(struct pinctrl_dev *pctldev, +static int bcm2835_pmx_set(struct pinctrl_dev *pctldev, unsigned func_selector, unsigned group_selector) { @@ -869,7 +869,7 @@ static const struct pinmux_ops bcm2835_pmx_ops = { .get_functions_count = bcm2835_pmx_get_functions_count, .get_function_name = bcm2835_pmx_get_function_name, .get_function_groups = bcm2835_pmx_get_function_groups, - .enable = bcm2835_pmx_enable, + .set_mux = bcm2835_pmx_set, .gpio_disable_free = bcm2835_pmx_gpio_disable_free, .gpio_set_direction = bcm2835_pmx_gpio_set_direction, }; diff --git a/drivers/pinctrl/pinctrl-imx.c b/drivers/pinctrl/pinctrl-imx.c index 946d594a64dd..570e5acb4a6a 100644 --- a/drivers/pinctrl/pinctrl-imx.c +++ b/drivers/pinctrl/pinctrl-imx.c @@ -179,8 +179,8 @@ static const struct pinctrl_ops imx_pctrl_ops = { }; -static int imx_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int imx_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct imx_pinctrl *ipctl = pinctrl_dev_get_drvdata(pctldev); const struct imx_pinctrl_soc_info *info = ipctl->info; @@ -298,7 +298,7 @@ static const struct pinmux_ops imx_pmx_ops = { .get_functions_count = imx_pmx_get_funcs_count, .get_function_name = imx_pmx_get_func_name, .get_function_groups = imx_pmx_get_groups, - .enable = imx_pmx_enable, + .set_mux = imx_pmx_set, }; static int imx_pinconf_get(struct pinctrl_dev *pctldev, diff --git a/drivers/pinctrl/pinctrl-imx1-core.c b/drivers/pinctrl/pinctrl-imx1-core.c index 483420757c9f..176a3e62f1cf 100644 --- a/drivers/pinctrl/pinctrl-imx1-core.c +++ b/drivers/pinctrl/pinctrl-imx1-core.c @@ -298,8 +298,8 @@ static const struct pinctrl_ops imx1_pctrl_ops = { }; -static int imx1_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int imx1_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct imx1_pinctrl *ipctl = pinctrl_dev_get_drvdata(pctldev); const struct imx1_pinctrl_soc_info *info = ipctl->info; @@ -385,7 +385,7 @@ static const struct pinmux_ops imx1_pmx_ops = { .get_functions_count = imx1_pmx_get_funcs_count, .get_function_name = imx1_pmx_get_func_name, .get_function_groups = imx1_pmx_get_groups, - .enable = imx1_pmx_enable, + .set_mux = imx1_pmx_set, }; static int imx1_pinconf_get(struct pinctrl_dev *pctldev, diff --git a/drivers/pinctrl/pinctrl-lantiq.c b/drivers/pinctrl/pinctrl-lantiq.c index d22ca252b80d..296e5b37f768 100644 --- a/drivers/pinctrl/pinctrl-lantiq.c +++ b/drivers/pinctrl/pinctrl-lantiq.c @@ -257,9 +257,9 @@ static int match_group_mux(const struct ltq_pin_group *grp, return ret; } -static int ltq_pmx_enable(struct pinctrl_dev *pctrldev, - unsigned func, - unsigned group) +static int ltq_pmx_set(struct pinctrl_dev *pctrldev, + unsigned func, + unsigned group) { struct ltq_pinmux_info *info = pinctrl_dev_get_drvdata(pctrldev); const struct ltq_pin_group *pin_grp = &info->grps[group]; @@ -316,7 +316,7 @@ static const struct pinmux_ops ltq_pmx_ops = { .get_functions_count = ltq_pmx_func_count, .get_function_name = ltq_pmx_func_name, .get_function_groups = ltq_pmx_get_groups, - .enable = ltq_pmx_enable, + .set_mux = ltq_pmx_set, .gpio_request_enable = ltq_pmx_gpio_request_enable, }; diff --git a/drivers/pinctrl/pinctrl-mxs.c b/drivers/pinctrl/pinctrl-mxs.c index 40c76f26998c..67035091f8fd 100644 --- a/drivers/pinctrl/pinctrl-mxs.c +++ b/drivers/pinctrl/pinctrl-mxs.c @@ -195,8 +195,8 @@ static int mxs_pinctrl_get_func_groups(struct pinctrl_dev *pctldev, return 0; } -static int mxs_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct mxs_pinctrl_data *d = pinctrl_dev_get_drvdata(pctldev); struct mxs_group *g = &d->soc->groups[group]; @@ -223,7 +223,7 @@ static const struct pinmux_ops mxs_pinmux_ops = { .get_functions_count = mxs_pinctrl_get_funcs_count, .get_function_name = mxs_pinctrl_get_func_name, .get_function_groups = mxs_pinctrl_get_func_groups, - .enable = mxs_pinctrl_enable, + .set_mux = mxs_pinctrl_set_mux, }; static int mxs_pinconf_get(struct pinctrl_dev *pctldev, diff --git a/drivers/pinctrl/pinctrl-palmas.c b/drivers/pinctrl/pinctrl-palmas.c index f13d0e78a41c..e3079d3d19fe 100644 --- a/drivers/pinctrl/pinctrl-palmas.c +++ b/drivers/pinctrl/pinctrl-palmas.c @@ -685,7 +685,8 @@ static int palmas_pinctrl_get_func_groups(struct pinctrl_dev *pctldev, return 0; } -static int palmas_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function, +static int palmas_pinctrl_set_mux(struct pinctrl_dev *pctldev, + unsigned function, unsigned group) { struct palmas_pctrl_chip_info *pci = pinctrl_dev_get_drvdata(pctldev); @@ -742,7 +743,7 @@ static const struct pinmux_ops palmas_pinmux_ops = { .get_functions_count = palmas_pinctrl_get_funcs_count, .get_function_name = palmas_pinctrl_get_func_name, .get_function_groups = palmas_pinctrl_get_func_groups, - .enable = palmas_pinctrl_enable, + .set_mux = palmas_pinctrl_set_mux, }; static int palmas_pinconf_get(struct pinctrl_dev *pctldev, diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 5e8b2e04cd7a..b91b966add8f 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -813,8 +813,8 @@ static int rockchip_pmx_get_groups(struct pinctrl_dev *pctldev, return 0; } -static int rockchip_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); const unsigned int *pins = info->groups[group].pins; @@ -889,7 +889,7 @@ static const struct pinmux_ops rockchip_pmx_ops = { .get_functions_count = rockchip_pmx_get_funcs_count, .get_function_name = rockchip_pmx_get_func_name, .get_function_groups = rockchip_pmx_get_groups, - .enable = rockchip_pmx_enable, + .set_mux = rockchip_pmx_set, .gpio_set_direction = rockchip_pmx_gpio_set_direction, }; diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 784de13facf3..9032422ad18f 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -447,7 +447,7 @@ static int pcs_get_function(struct pinctrl_dev *pctldev, unsigned pin, return 0; } -static int pcs_enable(struct pinctrl_dev *pctldev, unsigned fselector, +static int pcs_set_mux(struct pinctrl_dev *pctldev, unsigned fselector, unsigned group) { struct pcs_device *pcs; @@ -519,7 +519,7 @@ static const struct pinmux_ops pcs_pinmux_ops = { .get_functions_count = pcs_get_functions_count, .get_function_name = pcs_get_function_name, .get_function_groups = pcs_get_function_groups, - .enable = pcs_enable, + .set_mux = pcs_sex_mux, .gpio_request_enable = pcs_request_gpio, }; diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index 5475374d803f..6c4c41bed1e3 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -914,8 +914,8 @@ static struct st_pio_control *st_get_pio_control( return &bank->pc; } -static int st_pmx_enable(struct pinctrl_dev *pctldev, unsigned fselector, - unsigned group) +static int st_pmx_set_mux(struct pinctrl_dev *pctldev, unsigned fselector, + unsigned group) { struct st_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); struct st_pinconf *conf = info->groups[group].pin_conf; @@ -951,7 +951,7 @@ static struct pinmux_ops st_pmxops = { .get_functions_count = st_pmx_get_funcs_count, .get_function_name = st_pmx_get_fname, .get_function_groups = st_pmx_get_groups, - .enable = st_pmx_enable, + .set_mux = st_pmx_set_mux, .gpio_set_direction = st_pmx_set_gpio_direction, }; diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c index 71c5d4f0c538..3b9bfcf717ac 100644 --- a/drivers/pinctrl/pinctrl-tb10x.c +++ b/drivers/pinctrl/pinctrl-tb10x.c @@ -697,7 +697,7 @@ static void tb10x_gpio_disable_free(struct pinctrl_dev *pctl, mutex_unlock(&state->mutex); } -static int tb10x_pctl_enable(struct pinctrl_dev *pctl, +static int tb10x_pctl_set_mux(struct pinctrl_dev *pctl, unsigned func_selector, unsigned group_selector) { struct tb10x_pinctrl *state = pinctrl_dev_get_drvdata(pctl); @@ -744,7 +744,7 @@ static struct pinmux_ops tb10x_pinmux_ops = { .get_function_groups = tb10x_get_function_groups, .gpio_request_enable = tb10x_gpio_request_enable, .gpio_disable_free = tb10x_gpio_disable_free, - .enable = tb10x_pctl_enable, + .set_mux = tb10x_pctl_set_mux, }; static struct pinctrl_desc tb10x_pindesc = { diff --git a/drivers/pinctrl/pinctrl-tegra-xusb.c b/drivers/pinctrl/pinctrl-tegra-xusb.c index a06620474845..4648c9e5c582 100644 --- a/drivers/pinctrl/pinctrl-tegra-xusb.c +++ b/drivers/pinctrl/pinctrl-tegra-xusb.c @@ -281,9 +281,9 @@ static int tegra_xusb_padctl_get_function_groups(struct pinctrl_dev *pinctrl, return 0; } -static int tegra_xusb_padctl_pinmux_enable(struct pinctrl_dev *pinctrl, - unsigned int function, - unsigned int group) +static int tegra_xusb_padctl_pinmux_set(struct pinctrl_dev *pinctrl, + unsigned int function, + unsigned int group) { struct tegra_xusb_padctl *padctl = pinctrl_dev_get_drvdata(pinctrl); const struct tegra_xusb_padctl_lane *lane; @@ -311,7 +311,7 @@ static const struct pinmux_ops tegra_xusb_padctl_pinmux_ops = { .get_functions_count = tegra_xusb_padctl_get_functions_count, .get_function_name = tegra_xusb_padctl_get_function_name, .get_function_groups = tegra_xusb_padctl_get_function_groups, - .enable = tegra_xusb_padctl_pinmux_enable, + .set_mux = tegra_xusb_padctl_pinmux_set, }; static int tegra_xusb_padctl_pinconf_group_get(struct pinctrl_dev *pinctrl, diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c index 150af5503c09..e5949d51bc52 100644 --- a/drivers/pinctrl/pinctrl-tegra.c +++ b/drivers/pinctrl/pinctrl-tegra.c @@ -262,8 +262,9 @@ static int tegra_pinctrl_get_func_groups(struct pinctrl_dev *pctldev, return 0; } -static int tegra_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function, - unsigned group) +static int tegra_pinctrl_set_mux(struct pinctrl_dev *pctldev, + unsigned function, + unsigned group) { struct tegra_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); const struct tegra_pingroup *g; @@ -294,7 +295,7 @@ static const struct pinmux_ops tegra_pinmux_ops = { .get_functions_count = tegra_pinctrl_get_funcs_count, .get_function_name = tegra_pinctrl_get_func_name, .get_function_groups = tegra_pinctrl_get_func_groups, - .enable = tegra_pinctrl_enable, + .set_mux = tegra_pinctrl_set_mux, }; static int tegra_pinconf_reg(struct tegra_pmx *pmx, diff --git a/drivers/pinctrl/pinctrl-tz1090-pdc.c b/drivers/pinctrl/pinctrl-tz1090-pdc.c index 41e81a35cabb..3bb6a3b78864 100644 --- a/drivers/pinctrl/pinctrl-tz1090-pdc.c +++ b/drivers/pinctrl/pinctrl-tz1090-pdc.c @@ -547,8 +547,9 @@ static void tz1090_pdc_pinctrl_mux(struct tz1090_pdc_pmx *pmx, __global_unlock2(flags); } -static int tz1090_pdc_pinctrl_enable(struct pinctrl_dev *pctldev, - unsigned int function, unsigned int group) +static int tz1090_pdc_pinctrl_set_mux(struct pinctrl_dev *pctldev, + unsigned int function, + unsigned int group) { struct tz1090_pdc_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); const struct tz1090_pdc_pingroup *grp = &tz1090_pdc_groups[group]; @@ -634,7 +635,7 @@ static struct pinmux_ops tz1090_pdc_pinmux_ops = { .get_functions_count = tz1090_pdc_pinctrl_get_funcs_count, .get_function_name = tz1090_pdc_pinctrl_get_func_name, .get_function_groups = tz1090_pdc_pinctrl_get_func_groups, - .enable = tz1090_pdc_pinctrl_enable, + .set_mux = tz1090_pdc_pinctrl_set_mux, .gpio_request_enable = tz1090_pdc_pinctrl_gpio_request_enable, .gpio_disable_free = tz1090_pdc_pinctrl_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-tz1090.c b/drivers/pinctrl/pinctrl-tz1090.c index 24082216842e..48d36413b99f 100644 --- a/drivers/pinctrl/pinctrl-tz1090.c +++ b/drivers/pinctrl/pinctrl-tz1090.c @@ -1415,8 +1415,8 @@ found_mux: * the effect is the same as enabling the function on each individual pin in the * group. */ -static int tz1090_pinctrl_enable(struct pinctrl_dev *pctldev, - unsigned int function, unsigned int group) +static int tz1090_pinctrl_set_mux(struct pinctrl_dev *pctldev, + unsigned int function, unsigned int group) { struct tz1090_pmx *pmx = pinctrl_dev_get_drvdata(pctldev); struct tz1090_pingroup *grp; @@ -1517,7 +1517,7 @@ static struct pinmux_ops tz1090_pinmux_ops = { .get_functions_count = tz1090_pinctrl_get_funcs_count, .get_function_name = tz1090_pinctrl_get_func_name, .get_function_groups = tz1090_pinctrl_get_func_groups, - .enable = tz1090_pinctrl_enable, + .set_mux = tz1090_pinctrl_set_mux, .gpio_request_enable = tz1090_pinctrl_gpio_request_enable, .gpio_disable_free = tz1090_pinctrl_gpio_disable_free, }; diff --git a/drivers/pinctrl/pinctrl-u300.c b/drivers/pinctrl/pinctrl-u300.c index 0959bb36450f..e9c7113d81f2 100644 --- a/drivers/pinctrl/pinctrl-u300.c +++ b/drivers/pinctrl/pinctrl-u300.c @@ -955,8 +955,8 @@ static void u300_pmx_endisable(struct u300_pmx *upmx, unsigned selector, } } -static int u300_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int u300_pmx_set_mux(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct u300_pmx *upmx; @@ -994,7 +994,7 @@ static const struct pinmux_ops u300_pmx_ops = { .get_functions_count = u300_pmx_get_funcs_count, .get_function_name = u300_pmx_get_func_name, .get_function_groups = u300_pmx_get_groups, - .enable = u300_pmx_enable, + .set_mux = u300_pmx_set_mux, }; static int u300_pin_config_get(struct pinctrl_dev *pctldev, unsigned pin, diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index c055daf9a80f..b874458dcb88 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -41,7 +41,7 @@ int pinmux_check_ops(struct pinctrl_dev *pctldev) !ops->get_functions_count || !ops->get_function_name || !ops->get_function_groups || - !ops->enable) { + !ops->set_mux) { dev_err(pctldev->dev, "pinmux ops lacks necessary functions\n"); return -EINVAL; } @@ -445,15 +445,15 @@ int pinmux_enable_setting(struct pinctrl_setting const *setting) desc->mux_setting = &(setting->data.mux); } - ret = ops->enable(pctldev, setting->data.mux.func, - setting->data.mux.group); + ret = ops->set_mux(pctldev, setting->data.mux.func, + setting->data.mux.group); if (ret) - goto err_enable; + goto err_set_mux; return 0; -err_enable: +err_set_mux: for (i = 0; i < num_pins; i++) { desc = pin_desc_get(pctldev, pins[i]); if (desc) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 80a64cad907d..01eab47a746f 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -134,9 +134,9 @@ static int msm_get_function_groups(struct pinctrl_dev *pctldev, return 0; } -static int msm_pinmux_enable(struct pinctrl_dev *pctldev, - unsigned function, - unsigned group) +static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev, + unsigned function, + unsigned group) { struct msm_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); const struct msm_pingroup *g; @@ -170,7 +170,7 @@ static const struct pinmux_ops msm_pinmux_ops = { .get_functions_count = msm_get_functions_count, .get_function_name = msm_get_function_name, .get_function_groups = msm_get_function_groups, - .enable = msm_pinmux_enable, + .set_mux = msm_pinmux_set_mux, }; static int msm_config_reg(struct msm_pinctrl *pctrl, diff --git a/drivers/pinctrl/samsung/pinctrl-exynos5440.c b/drivers/pinctrl/samsung/pinctrl-exynos5440.c index 603da2f9dd95..b995ec2c5d16 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos5440.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos5440.c @@ -364,8 +364,9 @@ static void exynos5440_pinmux_setup(struct pinctrl_dev *pctldev, unsigned select } /* enable a specified pinmux by writing to registers */ -static int exynos5440_pinmux_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int exynos5440_pinmux_set_mux(struct pinctrl_dev *pctldev, + unsigned selector, + unsigned group) { exynos5440_pinmux_setup(pctldev, selector, group, true); return 0; @@ -387,7 +388,7 @@ static const struct pinmux_ops exynos5440_pinmux_ops = { .get_functions_count = exynos5440_get_functions_count, .get_function_name = exynos5440_pinmux_get_fname, .get_function_groups = exynos5440_pinmux_get_groups, - .enable = exynos5440_pinmux_enable, + .set_mux = exynos5440_pinmux_set_mux, .gpio_set_direction = exynos5440_pinmux_gpio_set_direction, }; diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index b07406da333c..4a47691c32b1 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -401,8 +401,9 @@ static void samsung_pinmux_setup(struct pinctrl_dev *pctldev, unsigned selector, } /* enable a specified pinmux by writing to registers */ -static int samsung_pinmux_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int samsung_pinmux_set_mux(struct pinctrl_dev *pctldev, + unsigned selector, + unsigned group) { samsung_pinmux_setup(pctldev, selector, group, true); return 0; @@ -413,7 +414,7 @@ static const struct pinmux_ops samsung_pinmux_ops = { .get_functions_count = samsung_get_functions_count, .get_function_name = samsung_pinmux_get_fname, .get_function_groups = samsung_pinmux_get_groups, - .enable = samsung_pinmux_enable, + .set_mux = samsung_pinmux_set_mux, }; /* set or get the pin config settings for a specified pin */ diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c index 11db3ee39d40..910deaefa0ac 100644 --- a/drivers/pinctrl/sh-pfc/pinctrl.c +++ b/drivers/pinctrl/sh-pfc/pinctrl.c @@ -312,8 +312,8 @@ static int sh_pfc_get_function_groups(struct pinctrl_dev *pctldev, return 0; } -static int sh_pfc_func_enable(struct pinctrl_dev *pctldev, unsigned selector, - unsigned group) +static int sh_pfc_func_set_mux(struct pinctrl_dev *pctldev, unsigned selector, + unsigned group) { struct sh_pfc_pinctrl *pmx = pinctrl_dev_get_drvdata(pctldev); struct sh_pfc *pfc = pmx->pfc; @@ -442,7 +442,7 @@ static const struct pinmux_ops sh_pfc_pinmux_ops = { .get_functions_count = sh_pfc_get_functions_count, .get_function_name = sh_pfc_get_function_name, .get_function_groups = sh_pfc_get_function_groups, - .enable = sh_pfc_func_enable, + .set_mux = sh_pfc_func_set_mux, .gpio_request_enable = sh_pfc_gpio_request_enable, .gpio_disable_free = sh_pfc_gpio_disable_free, .gpio_set_direction = sh_pfc_gpio_set_direction, diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c index 45f93f952a39..6f252fd7cb0c 100644 --- a/drivers/pinctrl/sirf/pinctrl-sirf.c +++ b/drivers/pinctrl/sirf/pinctrl-sirf.c @@ -179,8 +179,9 @@ static void sirfsoc_pinmux_endisable(struct sirfsoc_pmx *spmx, } } -static int sirfsoc_pinmux_enable(struct pinctrl_dev *pmxdev, unsigned selector, - unsigned group) +static int sirfsoc_pinmux_set_mux(struct pinctrl_dev *pmxdev, + unsigned selector, + unsigned group) { struct sirfsoc_pmx *spmx; @@ -237,7 +238,7 @@ static int sirfsoc_pinmux_request_gpio(struct pinctrl_dev *pmxdev, } static struct pinmux_ops sirfsoc_pinmux_ops = { - .enable = sirfsoc_pinmux_enable, + .set_mux = sirfsoc_pinmux_set_mux, .get_functions_count = sirfsoc_pinmux_get_funcs_count, .get_function_name = sirfsoc_pinmux_get_func_name, .get_function_groups = sirfsoc_pinmux_get_groups, diff --git a/drivers/pinctrl/spear/pinctrl-spear.c b/drivers/pinctrl/spear/pinctrl-spear.c index f72cc4e192bd..abdb05ac43dc 100644 --- a/drivers/pinctrl/spear/pinctrl-spear.c +++ b/drivers/pinctrl/spear/pinctrl-spear.c @@ -268,7 +268,7 @@ static int spear_pinctrl_endisable(struct pinctrl_dev *pctldev, return 0; } -static int spear_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function, +static int spear_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned function, unsigned group) { return spear_pinctrl_endisable(pctldev, function, group, true); @@ -338,7 +338,7 @@ static const struct pinmux_ops spear_pinmux_ops = { .get_functions_count = spear_pinctrl_get_funcs_count, .get_function_name = spear_pinctrl_get_func_name, .get_function_groups = spear_pinctrl_get_func_groups, - .enable = spear_pinctrl_enable, + .set_mux = spear_pinctrl_set_mux, .gpio_request_enable = gpio_request_enable, .gpio_disable_free = gpio_disable_free, }; diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 3df66e366c87..ef9d804e55de 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -393,9 +393,9 @@ static void sunxi_pmx_set(struct pinctrl_dev *pctldev, spin_unlock_irqrestore(&pctl->lock, flags); } -static int sunxi_pmx_enable(struct pinctrl_dev *pctldev, - unsigned function, - unsigned group) +static int sunxi_pmx_set_mux(struct pinctrl_dev *pctldev, + unsigned function, + unsigned group) { struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev); struct sunxi_pinctrl_group *g = pctl->groups + group; @@ -441,7 +441,7 @@ static const struct pinmux_ops sunxi_pmx_ops = { .get_functions_count = sunxi_pmx_get_funcs_cnt, .get_function_name = sunxi_pmx_get_func_name, .get_function_groups = sunxi_pmx_get_func_groups, - .enable = sunxi_pmx_enable, + .set_mux = sunxi_pmx_set_mux, .gpio_set_direction = sunxi_pmx_gpio_set_direction, }; diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c index 8cea355f9a81..d055d63309e4 100644 --- a/drivers/pinctrl/vt8500/pinctrl-wmt.c +++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c @@ -131,9 +131,9 @@ static int wmt_set_pinmux(struct wmt_pinctrl_data *data, unsigned func, return 0; } -static int wmt_pmx_enable(struct pinctrl_dev *pctldev, - unsigned func_selector, - unsigned group_selector) +static int wmt_pmx_set_mux(struct pinctrl_dev *pctldev, + unsigned func_selector, + unsigned group_selector) { struct wmt_pinctrl_data *data = pinctrl_dev_get_drvdata(pctldev); u32 pinnum = data->pins[group_selector].number; @@ -168,7 +168,7 @@ static struct pinmux_ops wmt_pinmux_ops = { .get_functions_count = wmt_pmx_get_functions_count, .get_function_name = wmt_pmx_get_function_name, .get_function_groups = wmt_pmx_get_function_groups, - .enable = wmt_pmx_enable, + .set_mux = wmt_pmx_set_mux, .gpio_disable_free = wmt_pmx_gpio_disable_free, .gpio_set_direction = wmt_pmx_gpio_set_direction, }; diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h index 3097aafbeb24..511bda9ed4bf 100644 --- a/include/linux/pinctrl/pinmux.h +++ b/include/linux/pinctrl/pinmux.h @@ -39,13 +39,12 @@ struct pinctrl_dev; * name can be used with the generic @pinctrl_ops to retrieve the * actual pins affected. The applicable groups will be returned in * @groups and the number of groups in @num_groups - * @enable: enable a certain muxing function with a certain pin group. The + * @set_mux: enable a certain muxing function with a certain pin group. The * driver does not need to figure out whether enabling this function * conflicts some other use of the pins in that group, such collisions * are handled by the pinmux subsystem. The @func_selector selects a * certain function whereas @group_selector selects a certain set of pins * to be used. On simple controllers the latter argument may be ignored - * @disable: disable a certain muxing selector with a certain pin group * @gpio_request_enable: requests and enables GPIO on a certain pin. * Implement this only if you can mux every pin individually as GPIO. The * affected GPIO range is passed along with an offset(pin number) into that @@ -68,8 +67,8 @@ struct pinmux_ops { unsigned selector, const char * const **groups, unsigned * const num_groups); - int (*enable) (struct pinctrl_dev *pctldev, unsigned func_selector, - unsigned group_selector); + int (*set_mux) (struct pinctrl_dev *pctldev, unsigned func_selector, + unsigned group_selector); int (*gpio_request_enable) (struct pinctrl_dev *pctldev, struct pinctrl_gpio_range *range, unsigned offset); -- cgit v1.2.3 From 3af0dbd592fe0a92002f16e341519ba03e92adf7 Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Mon, 1 Sep 2014 11:19:52 +0800 Subject: gpio: mcp23s08 to support both device tree and platform data Device tree is not enabled in some architecture where gpio driver mcp23s08 is still required. v2-changes: - Parse device tree properties into platform data other than individual variables. v3-changes: - Use of_node in gpio_chip device structure, because the struct device * always has an of_node which is NULL when OF is not used. Signed-off-by: Sonic Zhang Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 1 - drivers/gpio/gpio-mcp23s08.c | 64 +++++++++++++++++++++++--------------------- include/linux/spi/mcp23s08.h | 18 +++++++++++++ 3 files changed, 52 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index ec27ec0be8c2..ec398bee7c60 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -806,7 +806,6 @@ config GPIO_MAX7301 config GPIO_MCP23S08 tristate "Microchip MCP23xxx I/O expander" - depends on OF_GPIO depends on (SPI_MASTER && !I2C) || I2C help SPI/I2C driver for Microchip MCP23S08/MCP23S17/MCP23008/MCP23017 diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c index 6f183d9b487e..8488e2fd307c 100644 --- a/drivers/gpio/gpio-mcp23s08.c +++ b/drivers/gpio/gpio-mcp23s08.c @@ -479,7 +479,7 @@ static int mcp23s08_irq_setup(struct mcp23s08 *mcp) mutex_init(&mcp->irq_lock); - mcp->irq_domain = irq_domain_add_linear(chip->of_node, chip->ngpio, + mcp->irq_domain = irq_domain_add_linear(chip->dev->of_node, chip->ngpio, &irq_domain_simple_ops, mcp); if (!mcp->irq_domain) return -ENODEV; @@ -581,7 +581,7 @@ done: static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, void *data, unsigned addr, unsigned type, - unsigned base, unsigned pullups) + struct mcp23s08_platform_data *pdata, int cs) { int status; bool mirror = false; @@ -635,7 +635,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, return -EINVAL; } - mcp->chip.base = base; + mcp->chip.base = pdata->base; mcp->chip.can_sleep = true; mcp->chip.dev = dev; mcp->chip.owner = THIS_MODULE; @@ -648,11 +648,9 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, if (status < 0) goto fail; - mcp->irq_controller = of_property_read_bool(mcp->chip.of_node, - "interrupt-controller"); + mcp->irq_controller = pdata->irq_controller; if (mcp->irq && mcp->irq_controller && (type == MCP_TYPE_017)) - mirror = of_property_read_bool(mcp->chip.of_node, - "microchip,irq-mirror"); + mirror = pdata->mirror; if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN) || mirror) { /* mcp23s17 has IOCON twice, make sure they are in sync */ @@ -668,7 +666,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, } /* configure ~100K pullups */ - status = mcp->ops->write(mcp, MCP_GPPU, pullups); + status = mcp->ops->write(mcp, MCP_GPPU, pdata->chip[cs].pullups); if (status < 0) goto fail; @@ -768,25 +766,29 @@ MODULE_DEVICE_TABLE(of, mcp23s08_i2c_of_match); static int mcp230xx_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct mcp23s08_platform_data *pdata; + struct mcp23s08_platform_data *pdata, local_pdata; struct mcp23s08 *mcp; - int status, base, pullups; + int status; const struct of_device_id *match; match = of_match_device(of_match_ptr(mcp23s08_i2c_of_match), &client->dev); - pdata = dev_get_platdata(&client->dev); - if (match || !pdata) { - base = -1; - pullups = 0; + if (match) { + pdata = &local_pdata; + pdata->base = -1; + pdata->chip[0].pullups = 0; + pdata->irq_controller = of_property_read_bool( + client->dev.of_node, + "interrupt-controller"); + pdata->mirror = of_property_read_bool(client->dev.of_node, + "microchip,irq-mirror"); client->irq = irq_of_parse_and_map(client->dev.of_node, 0); } else { - if (!gpio_is_valid(pdata->base)) { + pdata = dev_get_platdata(&client->dev); + if (!pdata || !gpio_is_valid(pdata->base)) { dev_dbg(&client->dev, "invalid platform data\n"); return -EINVAL; } - base = pdata->base; - pullups = pdata->chip[0].pullups; } mcp = kzalloc(sizeof(*mcp), GFP_KERNEL); @@ -795,7 +797,7 @@ static int mcp230xx_probe(struct i2c_client *client, mcp->irq = client->irq; status = mcp23s08_probe_one(mcp, &client->dev, client, client->addr, - id->driver_data, base, pullups); + id->driver_data, pdata, 0); if (status) goto fail; @@ -863,14 +865,12 @@ static void mcp23s08_i2c_exit(void) { } static int mcp23s08_probe(struct spi_device *spi) { - struct mcp23s08_platform_data *pdata; + struct mcp23s08_platform_data *pdata, local_pdata; unsigned addr; int chips = 0; struct mcp23s08_driver_data *data; int status, type; - unsigned base = -1, - ngpio = 0, - pullups[ARRAY_SIZE(pdata->chip)]; + unsigned ngpio = 0; const struct of_device_id *match; u32 spi_present_mask = 0; @@ -893,11 +893,18 @@ static int mcp23s08_probe(struct spi_device *spi) return -ENODEV; } + pdata = &local_pdata; + pdata->base = -1; for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) { - pullups[addr] = 0; + pdata->chip[addr].pullups = 0; if (spi_present_mask & (1 << addr)) chips++; } + pdata->irq_controller = of_property_read_bool( + spi->dev.of_node, + "interrupt-controller"); + pdata->mirror = of_property_read_bool(spi->dev.of_node, + "microchip,irq-mirror"); } else { type = spi_get_device_id(spi)->driver_data; pdata = dev_get_platdata(&spi->dev); @@ -917,10 +924,7 @@ static int mcp23s08_probe(struct spi_device *spi) return -EINVAL; } spi_present_mask |= 1 << addr; - pullups[addr] = pdata->chip[addr].pullups; } - - base = pdata->base; } if (!chips) @@ -938,13 +942,13 @@ static int mcp23s08_probe(struct spi_device *spi) chips--; data->mcp[addr] = &data->chip[chips]; status = mcp23s08_probe_one(data->mcp[addr], &spi->dev, spi, - 0x40 | (addr << 1), type, base, - pullups[addr]); + 0x40 | (addr << 1), type, pdata, + addr); if (status < 0) goto fail; - if (base != -1) - base += (type == MCP_TYPE_S17) ? 16 : 8; + if (pdata->base != -1) + pdata->base += (type == MCP_TYPE_S17) ? 16 : 8; ngpio += (type == MCP_TYPE_S17) ? 16 : 8; } data->ngpio = ngpio; diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h index 2d676d5aaa89..aa07d7b32568 100644 --- a/include/linux/spi/mcp23s08.h +++ b/include/linux/spi/mcp23s08.h @@ -22,4 +22,22 @@ struct mcp23s08_platform_data { * base to base+15 (or base+31 for s17 variant). */ unsigned base; + /* Marks the device as a interrupt controller. + * NOTE: The interrupt functionality is only supported for i2c + * versions of the chips. The spi chips can also do the interrupts, + * but this is not supported by the linux driver yet. + */ + bool irq_controller; + + /* Sets the mirror flag in the IOCON register. Devices + * with two interrupt outputs (these are the devices ending with 17 and + * those that have 16 IOs) have two IO banks: IO 0-7 form bank 1 and + * IO 8-15 are bank 2. These chips have two different interrupt outputs: + * One for bank 1 and another for bank 2. If irq-mirror is set, both + * interrupts are generated regardless of the bank that an input change + * occurred on. If it is not set, the interrupt are only generated for + * the bank they belong to. + * On devices with only one interrupt output this property is useless. + */ + bool mirror; }; -- cgit v1.2.3 From 0d37899363b0e5486f8800231b7edd75e8b60942 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 3 Sep 2014 20:01:55 +0200 Subject: pinctrl: generic: Fix PIN_CONFIG_DRIVE_OPEN_SOURCE source/drain doc mismatch PIN_CONFIG_DRIVE_OPEN_SOURCE enables open source, not open drain. Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index a15f10727eb8..d578a60eff23 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -57,7 +57,7 @@ * which are then pulled up with an external resistor. Setting this * config will enable open drain mode, the argument is ignored. * @PIN_CONFIG_DRIVE_OPEN_SOURCE: the pin will be driven with open source - * (open emitter). Setting this config will enable open drain mode, the + * (open emitter). Setting this config will enable open source mode, the * argument is ignored. * @PIN_CONFIG_DRIVE_STRENGTH: the pin will sink or source at most the current * passed as argument. The argument is in mA. -- cgit v1.2.3 From 87fed556d08d21dd7dd3e0222c94c187e4c2d5e2 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Wed, 3 Sep 2014 10:35:13 +0200 Subject: bcma: get info about flash type SoC booted from MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is an ongoing work on cleaning MIPS's nvram support so it could be re-used on other platforms (bcm53xx to say precisely). This will require a bit of extra logic in bcma this patch implements. Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville --- drivers/bcma/driver_mips.c | 62 ++++++++++++++++++++++++++++++++++++++++++ include/linux/bcma/bcma_regs.h | 5 ++++ 2 files changed, 67 insertions(+) (limited to 'include/linux') diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c index 11115bbe115c..004d6aa671ce 100644 --- a/drivers/bcma/driver_mips.c +++ b/drivers/bcma/driver_mips.c @@ -21,6 +21,14 @@ #include #include +enum bcma_boot_dev { + BCMA_BOOT_DEV_UNK = 0, + BCMA_BOOT_DEV_ROM, + BCMA_BOOT_DEV_PARALLEL, + BCMA_BOOT_DEV_SERIAL, + BCMA_BOOT_DEV_NAND, +}; + static const char * const part_probes[] = { "bcm47xxpart", NULL }; static struct physmap_flash_data bcma_pflash_data = { @@ -229,11 +237,51 @@ u32 bcma_cpu_clock(struct bcma_drv_mips *mcore) } EXPORT_SYMBOL(bcma_cpu_clock); +static enum bcma_boot_dev bcma_boot_dev(struct bcma_bus *bus) +{ + struct bcma_drv_cc *cc = &bus->drv_cc; + u8 cc_rev = cc->core->id.rev; + + if (cc_rev == 42) { + struct bcma_device *core; + + core = bcma_find_core(bus, BCMA_CORE_NS_ROM); + if (core) { + switch (bcma_aread32(core, BCMA_IOST) & + BCMA_NS_ROM_IOST_BOOT_DEV_MASK) { + case BCMA_NS_ROM_IOST_BOOT_DEV_NOR: + return BCMA_BOOT_DEV_SERIAL; + case BCMA_NS_ROM_IOST_BOOT_DEV_NAND: + return BCMA_BOOT_DEV_NAND; + case BCMA_NS_ROM_IOST_BOOT_DEV_ROM: + default: + return BCMA_BOOT_DEV_ROM; + } + } + } else { + if (cc_rev == 38) { + if (cc->status & BCMA_CC_CHIPST_5357_NAND_BOOT) + return BCMA_BOOT_DEV_NAND; + else if (cc->status & BIT(5)) + return BCMA_BOOT_DEV_ROM; + } + + if ((cc->capabilities & BCMA_CC_CAP_FLASHT) == + BCMA_CC_FLASHT_PARA) + return BCMA_BOOT_DEV_PARALLEL; + else + return BCMA_BOOT_DEV_SERIAL; + } + + return BCMA_BOOT_DEV_SERIAL; +} + static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore) { struct bcma_bus *bus = mcore->core->bus; struct bcma_drv_cc *cc = &bus->drv_cc; struct bcma_pflash *pflash = &cc->pflash; + enum bcma_boot_dev boot_dev; switch (cc->capabilities & BCMA_CC_CAP_FLASHT) { case BCMA_CC_FLASHT_STSER: @@ -269,6 +317,20 @@ static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore) bcma_nflash_init(cc); } } + + /* Determine flash type this SoC boots from */ + boot_dev = bcma_boot_dev(bus); + switch (boot_dev) { + case BCMA_BOOT_DEV_PARALLEL: + case BCMA_BOOT_DEV_SERIAL: + /* TODO: Init NVRAM using BCMA_SOC_FLASH2 window */ + break; + case BCMA_BOOT_DEV_NAND: + /* TODO: Init NVRAM using BCMA_SOC_FLASH1 window */ + break; + default: + break; + } } void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h index 917dcd7965e7..e64ae7bf80a1 100644 --- a/include/linux/bcma/bcma_regs.h +++ b/include/linux/bcma/bcma_regs.h @@ -39,6 +39,11 @@ #define BCMA_RESET_CTL_RESET 0x0001 #define BCMA_RESET_ST 0x0804 +#define BCMA_NS_ROM_IOST_BOOT_DEV_MASK 0x0003 +#define BCMA_NS_ROM_IOST_BOOT_DEV_NOR 0x0000 +#define BCMA_NS_ROM_IOST_BOOT_DEV_NAND 0x0001 +#define BCMA_NS_ROM_IOST_BOOT_DEV_ROM 0x0002 + /* BCMA PCI config space registers. */ #define BCMA_PCI_PMCSR 0x44 #define BCMA_PCI_PE 0x100 -- cgit v1.2.3 From 8d38821cbcf51292cd5a23469d03bd38932a3ba9 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Aug 2014 14:15:10 +0200 Subject: resources: Add device-managed request/release_resource() Provide device-managed implementations of the request_resource() and release_resource() functions. Upon failure to request a resource, the new devm_request_resource() function will output an error message for consistent error reporting. Signed-off-by: Thierry Reding Signed-off-by: Bjorn Helgaas Acked-by: Tejun Heo --- Documentation/driver-model/devres.txt | 2 + include/linux/ioport.h | 5 +++ kernel/resource.c | 70 +++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) (limited to 'include/linux') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index d14710b04439..befc3fe12ba6 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -264,8 +264,10 @@ IIO IO region devm_release_mem_region() devm_release_region() + devm_release_resource() devm_request_mem_region() devm_request_region() + devm_request_resource() IOMAP devm_ioport_map() diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 142ec544167c..2c5250222278 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -215,6 +215,11 @@ static inline int __deprecated check_region(resource_size_t s, /* Wrappers for managed devices */ struct device; + +extern int devm_request_resource(struct device *dev, struct resource *root, + struct resource *new); +extern void devm_release_resource(struct device *dev, struct resource *new); + #define devm_request_region(dev,start,n,name) \ __devm_request_region(dev, &ioport_resource, (start), (n), (name)) #define devm_request_mem_region(dev,start,n,name) \ diff --git a/kernel/resource.c b/kernel/resource.c index da14b8d09296..ca24f19f9d18 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1248,6 +1248,76 @@ int release_mem_region_adjustable(struct resource *parent, /* * Managed region resource */ +static void devm_resource_release(struct device *dev, void *ptr) +{ + struct resource **r = ptr; + + release_resource(*r); +} + +/** + * devm_request_resource() - request and reserve an I/O or memory resource + * @dev: device for which to request the resource + * @root: root of the resource tree from which to request the resource + * @new: descriptor of the resource to request + * + * This is a device-managed version of request_resource(). There is usually + * no need to release resources requested by this function explicitly since + * that will be taken care of when the device is unbound from its driver. + * If for some reason the resource needs to be released explicitly, because + * of ordering issues for example, drivers must call devm_release_resource() + * rather than the regular release_resource(). + * + * When a conflict is detected between any existing resources and the newly + * requested resource, an error message will be printed. + * + * Returns 0 on success or a negative error code on failure. + */ +int devm_request_resource(struct device *dev, struct resource *root, + struct resource *new) +{ + struct resource *conflict, **ptr; + + ptr = devres_alloc(devm_resource_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + *ptr = new; + + conflict = request_resource_conflict(root, new); + if (conflict) { + dev_err(dev, "resource collision: %pR conflicts with %s %pR\n", + new, conflict->name, conflict); + devres_free(ptr); + return -EBUSY; + } + + devres_add(dev, ptr); + return 0; +} +EXPORT_SYMBOL(devm_request_resource); + +static int devm_resource_match(struct device *dev, void *res, void *data) +{ + struct resource **ptr = res; + + return *ptr == data; +} + +/** + * devm_release_resource() - release a previously requested resource + * @dev: device for which to release the resource + * @new: descriptor of the resource to release + * + * Releases a resource previously requested using devm_request_resource(). + */ +void devm_release_resource(struct device *dev, struct resource *new) +{ + WARN_ON(devres_release(dev, devm_resource_release, devm_resource_match, + new)); +} +EXPORT_SYMBOL(devm_release_resource); + struct region_devres { struct resource *parent; resource_size_t start; -- cgit v1.2.3 From efd01a72e7ec99ed583151fbf16b176cd2158967 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 5 Aug 2014 14:08:55 +0200 Subject: PCI/AER: Make standalone includable The header file references u16 and u32 types, but they are not defined in the header nor does the header pull in the necessary includes for them. This causes build breakage when the file is included without any of the dependencies being satisfied from somewhere else. Fix this by including linux/types.h (for u16 and u32). [bhelgaas: removed pci_dev declaration (already added by 5ccb8225abf2)] Signed-off-by: Thierry Reding Signed-off-by: Bjorn Helgaas --- include/linux/aer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/aer.h b/include/linux/aer.h index c826d1c28f9c..4fef65e57023 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -7,6 +7,8 @@ #ifndef _AER_H_ #define _AER_H_ +#include + #define AER_NONFATAL 0 #define AER_FATAL 1 #define AER_CORRECTABLE 2 -- cgit v1.2.3 From 3b5e6454aaf6b4439b19400d8365e2ec2d24e411 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Thu, 4 Sep 2014 22:04:42 -0400 Subject: fs/buffer.c: support buffer cache allocations with gfp modifiers A buffer cache is allocated from movable area because it is referred for a while and released soon. But some filesystems are taking buffer cache for a long time and it can disturb page migration. New APIs are introduced to allocate buffer cache with user specific flag. *_gfp APIs are for user want to set page allocation flag for page cache allocation. And *_unmovable APIs are for the user wants to allocate page cache from non-movable area. Signed-off-by: Gioh Kim Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/buffer.c | 45 +++++++++++++++++++++++++------------------ include/linux/buffer_head.h | 47 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 68 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 8f05111bbb8b..9a6029e0dd71 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, */ static int grow_dev_page(struct block_device *bdev, sector_t block, - pgoff_t index, int size, int sizebits) + pgoff_t index, int size, int sizebits, gfp_t gfp) { struct inode *inode = bdev->bd_inode; struct page *page; @@ -1002,8 +1002,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, int ret = 0; /* Will call free_more_memory() */ gfp_t gfp_mask; - gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS; - gfp_mask |= __GFP_MOVABLE; + gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp; + /* * XXX: __getblk_slow() can not really deal with failure and * will endlessly loop on improvised global reclaim. Prefer @@ -1058,7 +1058,7 @@ failed: * that page was dirty, the buffers are set dirty also. */ static int -grow_buffers(struct block_device *bdev, sector_t block, int size) +grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp) { pgoff_t index; int sizebits; @@ -1085,11 +1085,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) } /* Create a page with the proper size buffers.. */ - return grow_dev_page(bdev, block, index, size, sizebits); + return grow_dev_page(bdev, block, index, size, sizebits, gfp); } -static struct buffer_head * -__getblk_slow(struct block_device *bdev, sector_t block, int size) +struct buffer_head * +__getblk_slow(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || @@ -1111,13 +1112,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) if (bh) return bh; - ret = grow_buffers(bdev, block, size); + ret = grow_buffers(bdev, block, size, gfp); if (ret < 0) return NULL; if (ret == 0) free_more_memory(); } } +EXPORT_SYMBOL(__getblk_slow); /* * The relationship between dirty buffers and dirty pages: @@ -1371,24 +1373,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) EXPORT_SYMBOL(__find_get_block); /* - * __getblk will locate (and, if necessary, create) the buffer_head + * __getblk_gfp() will locate (and, if necessary, create) the buffer_head * which corresponds to the passed block_device, block and size. The * returned buffer has its reference count incremented. * - * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() - * attempt is failing. FIXME, perhaps? + * __getblk_gfp() will lock up the machine if grow_dev_page's + * try_to_free_buffers() attempt is failing. FIXME, perhaps? */ struct buffer_head * -__getblk(struct block_device *bdev, sector_t block, unsigned size) +__getblk_gfp(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { struct buffer_head *bh = __find_get_block(bdev, block, size); might_sleep(); if (bh == NULL) - bh = __getblk_slow(bdev, block, size); + bh = __getblk_slow(bdev, block, size, gfp); return bh; } -EXPORT_SYMBOL(__getblk); +EXPORT_SYMBOL(__getblk_gfp); /* * Do async read-ahead on a buffer.. @@ -1404,24 +1407,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) EXPORT_SYMBOL(__breadahead); /** - * __bread() - reads a specified block and returns the bh + * __bread_gfp() - reads a specified block and returns the bh * @bdev: the block_device to read from * @block: number of block * @size: size (in bytes) to read - * + * @gfp: page allocation flag + * * Reads a specified block, and returns buffer head that contains it. + * The page cache can be allocated from non-movable area + * not to prevent page migration if you set gfp to zero. * It returns NULL if the block was unreadable. */ struct buffer_head * -__bread(struct block_device *bdev, sector_t block, unsigned size) +__bread_gfp(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { - struct buffer_head *bh = __getblk(bdev, block, size); + struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); if (likely(bh) && !buffer_uptodate(bh)) bh = __bread_slow(bh); return bh; } -EXPORT_SYMBOL(__bread); +EXPORT_SYMBOL(__bread_gfp); /* * invalidate_bh_lrus() is called rarely - but not only at unmount. diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 324329ceea1e..73b45225a7ca 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -175,12 +175,13 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); -struct buffer_head *__getblk(struct block_device *bdev, sector_t block, - unsigned size); +struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); -struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size); +struct buffer_head *__bread_gfp(struct block_device *, + sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); @@ -295,7 +296,13 @@ static inline void bforget(struct buffer_head *bh) static inline struct buffer_head * sb_bread(struct super_block *sb, sector_t block) { - return __bread(sb->s_bdev, block, sb->s_blocksize); + return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); +} + +static inline struct buffer_head * +sb_bread_unmovable(struct super_block *sb, sector_t block) +{ + return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, 0); } static inline void @@ -307,7 +314,7 @@ sb_breadahead(struct super_block *sb, sector_t block) static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { - return __getblk(sb->s_bdev, block, sb->s_blocksize); + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } static inline struct buffer_head * @@ -344,6 +351,36 @@ static inline void lock_buffer(struct buffer_head *bh) __lock_buffer(bh); } +static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, + sector_t block, + unsigned size) +{ + return __getblk_gfp(bdev, block, size, 0); +} + +static inline struct buffer_head *__getblk(struct block_device *bdev, + sector_t block, + unsigned size) +{ + return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); +} + +/** + * __bread() - reads a specified block and returns the bh + * @bdev: the block_device to read from + * @block: number of block + * @size: size (in bytes) to read + * + * Reads a specified block, and returns buffer head that contains it. + * The page cache is allocated from movable area so that it can be migrated. + * It returns NULL if the block was unreadable. + */ +static inline struct buffer_head * +__bread(struct block_device *bdev, sector_t block, unsigned size) +{ + return __bread_gfp(bdev, block, size, __GFP_MOVABLE); +} + extern int __set_page_dirty_buffers(struct page *page); #else /* CONFIG_BLOCK */ -- cgit v1.2.3 From a9fe8e29945d56f35235a3a0fba99b4cf181d211 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 2 Sep 2014 15:49:26 +0200 Subject: ipv4: implement igmp_qrv sysctl to tune igmp robustness variable As in IPv6 people might increase the igmp query robustness variable to make sure unsolicited state change reports aren't lost on the network. Add and document this new knob to igmp code. RFCs allow tuning this parameter back to first IGMP RFC, so we also use this setting for all counters, including source specific multicast. Also take over sysctl value when upping the interface and don't reuse the last one seen on the interface. Cc: Flavio Leitner Signed-off-by: Hannes Frederic Sowa Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 5 +++++ include/linux/igmp.h | 1 + net/ipv4/igmp.c | 31 +++++++++++++++---------------- net/ipv4/sysctl_net_ipv4.c | 10 ++++++++++ 4 files changed, 31 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index cfc71ac0f764..db2383cb1df9 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -844,6 +844,11 @@ igmp_max_memberships - INTEGER conf/all/* is special, changes the settings for all interfaces +igmp_qrv - INTEGER + Controls the IGMP query robustness variable (see RFC2236 8.1). + Default: 2 (as specified by RFC2236 8.1) + Minimum: 1 (as specified by RFC6636 4.5) + log_martians - BOOLEAN Log packets with impossible addresses to kernel log. log_martians for the interface will be enabled if at least one of diff --git a/include/linux/igmp.h b/include/linux/igmp.h index f47550d75f85..2c677afeea47 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -39,6 +39,7 @@ static inline struct igmpv3_query * extern int sysctl_igmp_max_memberships; extern int sysctl_igmp_max_msf; +extern int sysctl_igmp_qrv; struct ip_sf_socklist { unsigned int sl_max; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 890c4258804c..4146153d875d 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -117,7 +117,7 @@ #define IGMP_V2_Unsolicited_Report_Interval (10*HZ) #define IGMP_V3_Unsolicited_Report_Interval (1*HZ) #define IGMP_Query_Response_Interval (10*HZ) -#define IGMP_Unsolicited_Report_Count 2 +#define IGMP_Query_Robustness_Variable 2 #define IGMP_Initial_Report_Delay (1) @@ -756,8 +756,7 @@ static void igmp_ifc_event(struct in_device *in_dev) { if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - in_dev->mr_ifc_count = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + in_dev->mr_ifc_count = in_dev->mr_qrv ?: sysctl_igmp_qrv; igmp_ifc_start_timer(in_dev, 1); } @@ -1086,8 +1085,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) pmc->interface = im->interface; in_dev_hold(in_dev); pmc->multiaddr = im->multiaddr; - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; pmc->sfmode = im->sfmode; if (pmc->sfmode == MCAST_INCLUDE) { struct ip_sf_list *psf; @@ -1226,8 +1224,7 @@ static void igmp_group_added(struct ip_mc_list *im) } /* else, v3 */ - im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + im->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; igmp_ifc_event(in_dev); #endif } @@ -1322,7 +1319,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); - im->unsolicit_count = IGMP_Unsolicited_Report_Count; + im->unsolicit_count = sysctl_igmp_qrv; #endif im->next_rcu = in_dev->mc_list; @@ -1460,7 +1457,7 @@ void ip_mc_init_dev(struct in_device *in_dev) (unsigned long)in_dev); setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); - in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; + in_dev->mr_qrv = sysctl_igmp_qrv; #endif spin_lock_init(&in_dev->mc_tomb_lock); @@ -1474,6 +1471,9 @@ void ip_mc_up(struct in_device *in_dev) ASSERT_RTNL(); +#ifdef CONFIG_IP_MULTICAST + in_dev->mr_qrv = sysctl_igmp_qrv; +#endif ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); for_each_pmc_rtnl(in_dev, pmc) @@ -1540,7 +1540,9 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) */ int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS; int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF; - +#ifdef CONFIG_IP_MULTICAST +int sysctl_igmp_qrv __read_mostly = IGMP_Query_Robustness_Variable; +#endif static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, __be32 *psfsrc) @@ -1575,8 +1577,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #ifdef CONFIG_IP_MULTICAST if (psf->sf_oldin && !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { - psf->sf_crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + psf->sf_crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; psf->sf_next = pmc->tomb; pmc->tomb = psf; rv = 1; @@ -1639,8 +1640,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* filter mode change */ pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; in_dev->mr_ifc_count = pmc->crcount; for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; @@ -1818,8 +1818,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, #ifdef CONFIG_IP_MULTICAST /* else no filters; keep old mode for reports */ - pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : - IGMP_Unsolicited_Report_Count; + pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; in_dev->mr_ifc_count = pmc->crcount; for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 79a007c52558..45d156dacd61 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -450,6 +450,16 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, +#ifdef CONFIG_IP_MULTICAST + { + .procname = "igmp_qrv", + .data = &sysctl_igmp_qrv, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, +#endif { .procname = "inet_peer_threshold", .data = &inet_peer_threshold, -- cgit v1.2.3 From 6b44f519017b219a12b37173c7eef8dfce2c0100 Mon Sep 17 00:00:00 2001 From: Scot Doyle Date: Sun, 24 Aug 2014 17:12:27 +0000 Subject: sched/wait: Document timeout corner case The timeout may elapse without 0 being returned, such as when waiting on an unused queue. Document this possibility. Signed-off-by: Scot Doyle Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.LNX.2.11.1408241710070.6462@localhost.localdomain Signed-off-by: Ingo Molnar --- include/linux/wait.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 6fb1ba5f9b2f..034f6fc7c65f 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -280,9 +280,11 @@ do { \ * wake_up() has to be called after changing any variable that could * change the result of the wait condition. * - * The function returns 0 if the @timeout elapsed, or the remaining - * jiffies (at least 1) if the @condition evaluated to %true before - * the @timeout elapsed. + * Returns: + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * or the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed. */ #define wait_event_timeout(wq, condition, timeout) \ ({ \ @@ -363,9 +365,11 @@ do { \ * change the result of the wait condition. * * Returns: - * 0 if the @timeout elapsed, -%ERESTARTSYS if it was interrupted by - * a signal, or the remaining jiffies (at least 1) if the @condition - * evaluated to %true before the @timeout elapsed. + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was + * interrupted by a signal. */ #define wait_event_interruptible_timeout(wq, condition, timeout) \ ({ \ -- cgit v1.2.3 From 2740f0cf8ec8bc7ee6a58f68841759e367dda98f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Sep 2014 15:24:58 +0300 Subject: cfg80211: add Intel Mobile Communications copyright Our legal structure changed at some point (see wikipedia), but we forgot to immediately switch over to the new copyright notice. For files that we have modified in the time since the change, add the proper copyright notice now. Signed-off-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + include/net/cfg80211.h | 1 + net/wireless/chan.c | 1 + net/wireless/core.c | 1 + net/wireless/nl80211.c | 1 + net/wireless/reg.c | 1 + net/wireless/scan.c | 1 + net/wireless/util.c | 1 + 8 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 8018c915ee63..77d4f26e0235 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -6,6 +6,7 @@ * Copyright (c) 2002-2003, Jouni Malinen * Copyright (c) 2005, Devicescape Software, Inc. * Copyright (c) 2006, Michael Wu + * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ab21299c8f4d..6be68bb31918 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4,6 +4,7 @@ * 802.11 device and configuration interface * * Copyright 2006-2010 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 992b34070bcb..72d81e2154d5 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -4,6 +4,7 @@ * any point in time. * * Copyright 2009 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH */ #include diff --git a/net/wireless/core.c b/net/wireless/core.c index a207eb116829..9698fe709251 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -2,6 +2,7 @@ * This is the linux wireless configuration interface. * * Copyright 2006-2010 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3011401f52c0..d876146498dd 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2,6 +2,7 @@ * This is the new netlink-based wireless configuration interface. * * Copyright 2006-2010 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH */ #include diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 1afdf45db38f..8bfc9b172a49 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3,6 +3,7 @@ * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2007 Johannes Berg * Copyright 2008-2011 Luis R. Rodriguez + * Copyright 2013-2014 Intel Mobile Communications GmbH * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 620a4b40d466..bda39f149810 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2,6 +2,7 @@ * cfg80211 scan result handling * * Copyright 2008 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH */ #include #include diff --git a/net/wireless/util.c b/net/wireless/util.c index 728f1c0dc70d..a8b2816ffcb9 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2,6 +2,7 @@ * Wireless utility functions * * Copyright 2007-2009 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH */ #include #include -- cgit v1.2.3 From 7d75a871888e3f5e1a7c99bf240d1cd67d8bdfa0 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Fri, 5 Sep 2014 13:09:25 +0200 Subject: gpio: fix 'CONFIG_GPIO_IRQCHIP' comments These two typos were introduced in commit 1425052097b5 ("gpio: add IRQ chip helpers in gpiolib"). The correct symbol name is CONFIG_GPIOLIB_IRQCHIP. [jkosina@suse.cz: add changelog] Signed-off-by: Paul Bolle Signed-off-by: Jiri Kosina --- include/linux/gpio/driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 573e4f3243d0..4fcd60913a42 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -105,7 +105,7 @@ struct gpio_chip { #ifdef CONFIG_GPIOLIB_IRQCHIP /* - * With CONFIG_GPIO_IRQCHIP we get an irqchip inside the gpiolib + * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib * to handle IRQs for most practical cases. */ struct irq_chip *irqchip; @@ -221,7 +221,7 @@ int gpiochip_irqchip_add(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type); -#endif /* CONFIG_GPIO_IRQCHIP */ +#endif /* CONFIG_GPIOLIB_IRQCHIP */ #else /* CONFIG_GPIOLIB */ -- cgit v1.2.3 From 60a3b2253c413cf601783b070507d7dd6620c954 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 2 Sep 2014 22:53:44 +0200 Subject: net: bpf: make eBPF interpreter images read-only With eBPF getting more extended and exposure to user space is on it's way, hardening the memory range the interpreter uses to steer its command flow seems appropriate. This patch moves the to be interpreted bytecode to read-only pages. In case we execute a corrupted BPF interpreter image for some reason e.g. caused by an attacker which got past a verifier stage, it would not only provide arbitrary read/write memory access but arbitrary function calls as well. After setting up the BPF interpreter image, its contents do not change until destruction time, thus we can setup the image on immutable made pages in order to mitigate modifications to that code. The idea is derived from commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit against spraying attacks"). This is possible because bpf_prog is not part of sk_filter anymore. After setup bpf_prog cannot be altered during its life-time. This prevents any modifications to the entire bpf_prog structure (incl. function/JIT image pointer). Every eBPF program (including classic BPF that are migrated) have to call bpf_prog_select_runtime() to select either interpreter or a JIT image as a last setup step, and they all are being freed via bpf_prog_free(), including non-JIT. Therefore, we can easily integrate this into the eBPF life-time, plus since we directly allocate a bpf_prog, we have no performance penalty. Tested with seccomp and test_bpf testsuite in JIT/non-JIT mode and manual inspection of kernel_page_tables. Brad Spengler proposed the same idea via Twitter during development of this patch. Joint work with Hannes Frederic Sowa. Suggested-by: Brad Spengler Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 3 +- arch/mips/net/bpf_jit.c | 3 +- arch/powerpc/net/bpf_jit_comp.c | 3 +- arch/s390/net/bpf_jit_comp.c | 2 +- arch/sparc/net/bpf_jit_comp.c | 3 +- arch/x86/net/bpf_jit_comp.c | 18 ++++------ include/linux/filter.h | 49 ++++++++++++++++++++++--- kernel/bpf/core.c | 80 +++++++++++++++++++++++++++++++++++++++-- kernel/seccomp.c | 7 ++-- lib/test_bpf.c | 2 +- net/core/filter.c | 6 ++-- 11 files changed, 144 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index a37b989a2f91..a76623bcf722 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -930,5 +930,6 @@ void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 05a56619ece2..cfa83cf2447d 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -1427,5 +1427,6 @@ void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 3afa6f4c1957..40c53ff59124 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -697,5 +697,6 @@ void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 61e45b7c04d7..f2833c5b218a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -887,5 +887,5 @@ void bpf_jit_free(struct bpf_prog *fp) module_free(NULL, header); free_filter: - kfree(fp); + bpf_prog_unlock_free(fp); } diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 1f76c22a6a75..f7a736b645e8 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -812,5 +812,6 @@ void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b08a98c59530..39ccfbb4a723 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -972,23 +972,17 @@ out: kfree(addrs); } -static void bpf_jit_free_deferred(struct work_struct *work) +void bpf_jit_free(struct bpf_prog *fp) { - struct bpf_prog *fp = container_of(work, struct bpf_prog, work); unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; + if (!fp->jited) + goto free_filter; + set_memory_rw(addr, header->pages); module_free(NULL, header); - kfree(fp); -} -void bpf_jit_free(struct bpf_prog *fp) -{ - if (fp->jited) { - INIT_WORK(&fp->work, bpf_jit_free_deferred); - schedule_work(&fp->work); - } else { - kfree(fp); - } +free_filter: + bpf_prog_unlock_free(fp); } diff --git a/include/linux/filter.h b/include/linux/filter.h index a5227ab8ccb1..c78994593355 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -9,6 +9,11 @@ #include #include #include +#include + +struct sk_buff; +struct sock; +struct seccomp_data; /* Internally used and optimized filter representation with extended * instruction set based on top of classic BPF. @@ -320,20 +325,23 @@ struct sock_fprog_kern { struct sock_filter *filter; }; -struct sk_buff; -struct sock; -struct seccomp_data; +struct bpf_work_struct { + struct bpf_prog *prog; + struct work_struct work; +}; struct bpf_prog { + u32 pages; /* Number of allocated pages */ u32 jited:1, /* Is our filter JIT'ed? */ len:31; /* Number of filter blocks */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ + struct bpf_work_struct *work; /* Deferred free work struct */ unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); + /* Instructions for interpreter */ union { struct sock_filter insns[0]; struct bpf_insn insnsi[0]; - struct work_struct work; }; }; @@ -353,6 +361,26 @@ static inline unsigned int bpf_prog_size(unsigned int proglen) #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +static inline void bpf_prog_lock_ro(struct bpf_prog *fp) +{ + set_memory_ro((unsigned long)fp, fp->pages); +} + +static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) +{ + set_memory_rw((unsigned long)fp, fp->pages); +} +#else +static inline void bpf_prog_lock_ro(struct bpf_prog *fp) +{ +} + +static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) +{ +} +#endif /* CONFIG_DEBUG_SET_MODULE_RONX */ + int sk_filter(struct sock *sk, struct sk_buff *skb); void bpf_prog_select_runtime(struct bpf_prog *fp); @@ -361,6 +389,17 @@ void bpf_prog_free(struct bpf_prog *fp); int bpf_convert_filter(struct sock_filter *prog, int len, struct bpf_insn *new_prog, int *new_len); +struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); +struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, + gfp_t gfp_extra_flags); +void __bpf_prog_free(struct bpf_prog *fp); + +static inline void bpf_prog_unlock_free(struct bpf_prog *fp) +{ + bpf_prog_unlock_ro(fp); + __bpf_prog_free(fp); +} + int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); void bpf_prog_destroy(struct bpf_prog *fp); @@ -450,7 +489,7 @@ static inline void bpf_jit_compile(struct bpf_prog *fp) static inline void bpf_jit_free(struct bpf_prog *fp) { - kfree(fp); + bpf_prog_unlock_free(fp); } #endif /* CONFIG_BPF_JIT */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7f0dbcbb34af..b54bb2c2e494 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -22,6 +22,7 @@ */ #include #include +#include #include /* Registers */ @@ -63,6 +64,67 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns return NULL; } +struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) +{ + gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | + gfp_extra_flags; + struct bpf_work_struct *ws; + struct bpf_prog *fp; + + size = round_up(size, PAGE_SIZE); + fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); + if (fp == NULL) + return NULL; + + ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags); + if (ws == NULL) { + vfree(fp); + return NULL; + } + + fp->pages = size / PAGE_SIZE; + fp->work = ws; + + return fp; +} +EXPORT_SYMBOL_GPL(bpf_prog_alloc); + +struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, + gfp_t gfp_extra_flags) +{ + gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | + gfp_extra_flags; + struct bpf_prog *fp; + + BUG_ON(fp_old == NULL); + + size = round_up(size, PAGE_SIZE); + if (size <= fp_old->pages * PAGE_SIZE) + return fp_old; + + fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); + if (fp != NULL) { + memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); + fp->pages = size / PAGE_SIZE; + + /* We keep fp->work from fp_old around in the new + * reallocated structure. + */ + fp_old->work = NULL; + __bpf_prog_free(fp_old); + } + + return fp; +} +EXPORT_SYMBOL_GPL(bpf_prog_realloc); + +void __bpf_prog_free(struct bpf_prog *fp) +{ + kfree(fp->work); + vfree(fp); +} +EXPORT_SYMBOL_GPL(__bpf_prog_free); + /* Base function for offset calculation. Needs to go into .text section, * therefore keeping it non-static as well; will also be used by JITs * anyway later on, so do not let the compiler omit it. @@ -523,12 +585,26 @@ void bpf_prog_select_runtime(struct bpf_prog *fp) /* Probe if internal BPF can be JITed */ bpf_int_jit_compile(fp); + /* Lock whole bpf_prog as read-only */ + bpf_prog_lock_ro(fp); } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); -/* free internal BPF program */ +static void bpf_prog_free_deferred(struct work_struct *work) +{ + struct bpf_work_struct *ws; + + ws = container_of(work, struct bpf_work_struct, work); + bpf_jit_free(ws->prog); +} + +/* Free internal BPF program */ void bpf_prog_free(struct bpf_prog *fp) { - bpf_jit_free(fp); + struct bpf_work_struct *ws = fp->work; + + INIT_WORK(&ws->work, bpf_prog_free_deferred); + ws->prog = fp; + schedule_work(&ws->work); } EXPORT_SYMBOL_GPL(bpf_prog_free); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 44eb005c6695..84922befea84 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -395,16 +395,15 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) if (!filter) goto free_prog; - filter->prog = kzalloc(bpf_prog_size(new_len), - GFP_KERNEL|__GFP_NOWARN); + filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN); if (!filter->prog) goto free_filter; ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len); if (ret) goto free_filter_prog; - kfree(fp); + kfree(fp); atomic_set(&filter->usage, 1); filter->prog->len = new_len; @@ -413,7 +412,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) return filter; free_filter_prog: - kfree(filter->prog); + __bpf_prog_free(filter->prog); free_filter: kfree(filter); free_prog: diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 8c66c6aace04..9a67456ba29a 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1836,7 +1836,7 @@ static struct bpf_prog *generate_filter(int which, int *err) break; case INTERNAL: - fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL); + fp = bpf_prog_alloc(bpf_prog_size(flen), 0); if (fp == NULL) { pr_cont("UNEXPECTED_FAIL no memory left\n"); *err = -ENOMEM; diff --git a/net/core/filter.c b/net/core/filter.c index d814b8a89d0f..37f8eb06fdee 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -933,7 +933,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) /* Expand fp for appending the new filter representation. */ old_fp = fp; - fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL); + fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0); if (!fp) { /* The old_fp is still around in case we couldn't * allocate new memory, so uncharge on that one. @@ -1013,7 +1013,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL); + fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); if (!fp) return -ENOMEM; @@ -1069,7 +1069,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (fprog->filter == NULL) return -EINVAL; - prog = kmalloc(bpf_fsize, GFP_KERNEL); + prog = bpf_prog_alloc(bpf_fsize, 0); if (!prog) return -ENOMEM; -- cgit v1.2.3 From f0db9b073415848709dd59a6394969882f517da9 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Wed, 3 Sep 2014 03:17:20 +0530 Subject: ethtool: Add generic options for tunables This patch adds new ethtool cmd, ETHTOOL_GTUNABLE & ETHTOOL_STUNABLE for getting tunable values from driver. Add get_tunable and set_tunable to ethtool_ops. Driver implements these functions for getting/setting tunable value. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- include/linux/ethtool.h | 4 +++ include/uapi/linux/ethtool.h | 28 +++++++++++++++ net/core/ethtool.c | 81 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e658229fee39..c1a2d60dfb82 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -257,6 +257,10 @@ struct ethtool_ops { struct ethtool_eeprom *, u8 *); int (*get_eee)(struct net_device *, struct ethtool_eee *); int (*set_eee)(struct net_device *, struct ethtool_eee *); + int (*get_tunable)(struct net_device *, + const struct ethtool_tunable *, void *); + int (*set_tunable)(struct net_device *, + const struct ethtool_tunable *, const void *); }; diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index e3c7a719c76b..7a364f2f3d3f 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -209,6 +209,32 @@ struct ethtool_value { __u32 data; }; +enum tunable_id { + ETHTOOL_ID_UNSPEC, + ETHTOOL_RX_COPYBREAK, +}; + +enum tunable_type_id { + ETHTOOL_TUNABLE_UNSPEC, + ETHTOOL_TUNABLE_U8, + ETHTOOL_TUNABLE_U16, + ETHTOOL_TUNABLE_U32, + ETHTOOL_TUNABLE_U64, + ETHTOOL_TUNABLE_STRING, + ETHTOOL_TUNABLE_S8, + ETHTOOL_TUNABLE_S16, + ETHTOOL_TUNABLE_S32, + ETHTOOL_TUNABLE_S64, +}; + +struct ethtool_tunable { + __u32 cmd; + __u32 id; + __u32 type_id; + __u32 len; + void *data[0]; +}; + /** * struct ethtool_regs - hardware register dump * @cmd: Command number = %ETHTOOL_GREGS @@ -1152,6 +1178,8 @@ enum ethtool_sfeatures_retval_bits { #define ETHTOOL_GRSSH 0x00000046 /* Get RX flow hash configuration */ #define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */ +#define ETHTOOL_GTUNABLE 0x00000048 /* Get tunable configuration */ +#define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 17cb912793fa..27e61b886520 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1621,6 +1621,80 @@ static int ethtool_get_module_eeprom(struct net_device *dev, modinfo.eeprom_len); } +static int ethtool_tunable_valid(const struct ethtool_tunable *tuna) +{ + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + if (tuna->len != sizeof(u32) || + tuna->type_id != ETHTOOL_TUNABLE_U32) + return -EINVAL; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr) +{ + int ret; + struct ethtool_tunable tuna; + const struct ethtool_ops *ops = dev->ethtool_ops; + void *data; + + if (!ops->get_tunable) + return -EOPNOTSUPP; + if (copy_from_user(&tuna, useraddr, sizeof(tuna))) + return -EFAULT; + ret = ethtool_tunable_valid(&tuna); + if (ret) + return ret; + data = kmalloc(tuna.len, GFP_USER); + if (!data) + return -ENOMEM; + ret = ops->get_tunable(dev, &tuna, data); + if (ret) + goto out; + useraddr += sizeof(tuna); + ret = -EFAULT; + if (copy_to_user(useraddr, data, tuna.len)) + goto out; + ret = 0; + +out: + kfree(data); + return ret; +} + +static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr) +{ + int ret; + struct ethtool_tunable tuna; + const struct ethtool_ops *ops = dev->ethtool_ops; + void *data; + + if (!ops->set_tunable) + return -EOPNOTSUPP; + if (copy_from_user(&tuna, useraddr, sizeof(tuna))) + return -EFAULT; + ret = ethtool_tunable_valid(&tuna); + if (ret) + return ret; + data = kmalloc(tuna.len, GFP_USER); + if (!data) + return -ENOMEM; + useraddr += sizeof(tuna); + ret = -EFAULT; + if (copy_from_user(data, useraddr, tuna.len)) + goto out; + ret = ops->set_tunable(dev, &tuna, data); + +out: + kfree(data); + return ret; +} + /* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) @@ -1670,6 +1744,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GCHANNELS: case ETHTOOL_GET_TS_INFO: case ETHTOOL_GEEE: + case ETHTOOL_GTUNABLE: break; default: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -1857,6 +1932,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GMODULEEEPROM: rc = ethtool_get_module_eeprom(dev, useraddr); break; + case ETHTOOL_GTUNABLE: + rc = ethtool_get_tunable(dev, useraddr); + break; + case ETHTOOL_STUNABLE: + rc = ethtool_set_tunable(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.2.3 From 62bccb8cdb69051b95a55ab0c489e3cab261c8ef Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 4 Sep 2014 13:31:35 -0400 Subject: net-timestamp: Make the clone operation stand-alone from phy timestamping The phy timestamping takes a different path than the regular timestamping does in that it will create a clone first so that the packets needing to be timestamped can be placed in a queue, or the context block could be used. In order to support these use cases I am pulling the core of the code out so it can be used in other drivers beyond just phy devices. In addition I have added a destructor named sock_efree which is meant to provide a simple way for dropping the reference to skb exceptions that aren't part of either the receive or send windows for the socket, and I have removed some duplication in spots where this destructor could be used in place of sock_edemux. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- drivers/net/phy/dp83640.c | 6 +++--- include/linux/skbuff.h | 2 ++ include/net/sock.h | 1 + net/core/skbuff.c | 32 +++++++++++++++++++++++++------- net/core/sock.c | 6 ++++++ net/core/timestamping.c | 14 +++----------- 6 files changed, 40 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index d5991ac46ab0..87648b306551 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1148,7 +1148,7 @@ static void dp83640_remove(struct phy_device *phydev) kfree_skb(skb); while ((skb = skb_dequeue(&dp83640->tx_queue)) != NULL) - skb_complete_tx_timestamp(skb, NULL); + kfree_skb(skb); clock = dp83640_clock_get(dp83640->clock); @@ -1405,7 +1405,7 @@ static void dp83640_txtstamp(struct phy_device *phydev, case HWTSTAMP_TX_ONESTEP_SYNC: if (is_sync(skb, type)) { - skb_complete_tx_timestamp(skb, NULL); + kfree_skb(skb); return; } /* fall through */ @@ -1416,7 +1416,7 @@ static void dp83640_txtstamp(struct phy_device *phydev, case HWTSTAMP_TX_OFF: default: - skb_complete_tx_timestamp(skb, NULL); + kfree_skb(skb); break; } } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 02529fcad1ac..1cf0cfaef10a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2690,6 +2690,8 @@ static inline ktime_t net_invalid_timestamp(void) return ktime_set(0, 0); } +struct sk_buff *skb_clone_sk(struct sk_buff *skb); + #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING void skb_clone_tx_timestamp(struct sk_buff *skb); diff --git a/include/net/sock.h b/include/net/sock.h index 3fde6130789d..e02be37a3d91 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1574,6 +1574,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, void sock_wfree(struct sk_buff *skb); void skb_orphan_partial(struct sk_buff *skb); void sock_rfree(struct sk_buff *skb); +void sock_efree(struct sk_buff *skb); void sock_edemux(struct sk_buff *skb); int sock_setsockopt(struct socket *sock, int level, int op, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 697e696c914b..a936a401564e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3511,6 +3511,27 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) } EXPORT_SYMBOL(sock_dequeue_err_skb); +struct sk_buff *skb_clone_sk(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + struct sk_buff *clone; + + if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt)) + return NULL; + + clone = skb_clone(skb, GFP_ATOMIC); + if (!clone) { + sock_put(sk); + return NULL; + } + + clone->sk = sk; + clone->destructor = sock_efree; + + return clone; +} +EXPORT_SYMBOL(skb_clone_sk); + static void __skb_complete_tx_timestamp(struct sk_buff *skb, struct sock *sk, int tstype) @@ -3540,14 +3561,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, { struct sock *sk = skb->sk; - skb->sk = NULL; + /* take a reference to prevent skb_orphan() from freeing the socket */ + sock_hold(sk); - if (hwtstamps) { - *skb_hwtstamps(skb) = *hwtstamps; - __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); - } else { - kfree_skb(skb); - } + *skb_hwtstamps(skb) = *hwtstamps; + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); sock_put(sk); } diff --git a/net/core/sock.c b/net/core/sock.c index f1a638ee93d9..d04005c51724 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1637,6 +1637,12 @@ void sock_rfree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_rfree); +void sock_efree(struct sk_buff *skb) +{ + sock_put(skb->sk); +} +EXPORT_SYMBOL(sock_efree); + void sock_edemux(struct sk_buff *skb) { struct sock *sk = skb->sk; diff --git a/net/core/timestamping.c b/net/core/timestamping.c index f48a59fd8f39..43d3dd62fcc8 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -36,10 +36,9 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) { struct phy_device *phydev; struct sk_buff *clone; - struct sock *sk = skb->sk; unsigned int type; - if (!sk) + if (!skb->sk) return; type = classify(skb); @@ -48,16 +47,9 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) phydev = skb->dev->phydev; if (likely(phydev->drv->txtstamp)) { - if (!atomic_inc_not_zero(&sk->sk_refcnt)) + clone = skb_clone_sk(skb); + if (!clone) return; - - clone = skb_clone(skb, GFP_ATOMIC); - if (!clone) { - sock_put(sk); - return; - } - - clone->sk = sk; phydev->drv->txtstamp(phydev, clone, type); } } -- cgit v1.2.3 From 56193d1bce2b2759cb4bdcc00cd05544894a0c90 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 5 Sep 2014 19:20:26 -0400 Subject: net: Add function for parsing the header length out of linear ethernet frames This patch updates some of the flow_dissector api so that it can be used to parse the length of ethernet buffers stored in fragments. Most of the changes needed were to __skb_get_poff as it needed to be updated to support sending a linear buffer instead of a skb. I have split __skb_get_poff into two functions, the first is skb_get_poff and it retains the functionality of the original __skb_get_poff. The other function is __skb_get_poff which now works much like __skb_flow_dissect in relation to skb_flow_dissect in that it provides the same functionality but works with just a data buffer and hlen instead of needing an skb. Signed-off-by: Alexander Duyck Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 1 + include/linux/skbuff.h | 4 +++- include/net/flow_keys.h | 2 ++ net/core/filter.c | 2 +- net/core/flow_dissector.c | 46 +++++++++++++++++++++++++++++++-------------- net/ethernet/eth.c | 27 ++++++++++++++++++++++++++ 6 files changed, 66 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 9c5529dc6d07..733980fce8e3 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -29,6 +29,7 @@ #include #ifdef __KERNEL__ +u32 eth_get_headlen(void *data, unsigned int max_len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1cf0cfaef10a..07c9fdd0c126 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3218,7 +3218,9 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); int skb_checksum_setup(struct sk_buff *skb, bool recalculate); -u32 __skb_get_poff(const struct sk_buff *skb); +u32 skb_get_poff(const struct sk_buff *skb); +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen); /** * skb_head_is_locked - Determine if the skb->head is locked down diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h index 9a03f73c4974..7ee2df083542 100644 --- a/include/net/flow_keys.h +++ b/include/net/flow_keys.h @@ -40,4 +40,6 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); } u32 flow_hash_from_keys(struct flow_keys *keys); +unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, + __be16 protocol); #endif diff --git a/net/core/filter.c b/net/core/filter.c index 37f8eb06fdee..fa5b7d0f77ac 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -113,7 +113,7 @@ static unsigned int pkt_type_offset(void) static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { - return __skb_get_poff((struct sk_buff *)(unsigned long) ctx); + return skb_get_poff((struct sk_buff *)(unsigned long) ctx); } static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 12f48ca7a0b0..8560dea58803 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -13,6 +13,7 @@ #include #include #include +#include /* copy saddr & daddr, possibly using 64bit load/store * Equivalent to : flow->src = iph->saddr; @@ -117,6 +118,13 @@ ipv6: flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); + /* skip the flow label processing if skb is NULL. The + * assumption here is that if there is no skb we are not + * looking for flow info as much as we are length. + */ + if (!skb) + break; + flow_label = ip6_flowlabel(iph); if (flow_label) { /* Awesome, IPv6 packet has a flow label so we can @@ -165,6 +173,9 @@ ipv6: return false; } } + case htons(ETH_P_FCOE): + flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN); + /* fall through */ default: return false; } @@ -316,26 +327,18 @@ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, } EXPORT_SYMBOL(__skb_tx_hash); -/* __skb_get_poff() returns the offset to the payload as far as it could - * be dissected. The main user is currently BPF, so that we can dynamically - * truncate packets without needing to push actual payload to the user - * space and can analyze headers only, instead. - */ -u32 __skb_get_poff(const struct sk_buff *skb) +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen) { - struct flow_keys keys; - u32 poff = 0; + u32 poff = keys->thoff; - if (!skb_flow_dissect(skb, &keys)) - return 0; - - poff += keys.thoff; - switch (keys.ip_proto) { + switch (keys->ip_proto) { case IPPROTO_TCP: { const struct tcphdr *tcph; struct tcphdr _tcph; - tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph); + tcph = __skb_header_pointer(skb, poff, sizeof(_tcph), + data, hlen, &_tcph); if (!tcph) return poff; @@ -369,6 +372,21 @@ u32 __skb_get_poff(const struct sk_buff *skb) return poff; } +/* skb_get_poff() returns the offset to the payload as far as it could + * be dissected. The main user is currently BPF, so that we can dynamically + * truncate packets without needing to push actual payload to the user + * space and can analyze headers only, instead. + */ +u32 skb_get_poff(const struct sk_buff *skb) +{ + struct flow_keys keys; + + if (!skb_flow_dissect(skb, &keys)) + return 0; + + return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); +} + static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_XPS diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 5cebca134585..33a140e15834 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -145,6 +145,33 @@ int eth_rebuild_header(struct sk_buff *skb) } EXPORT_SYMBOL(eth_rebuild_header); +/** + * eth_get_headlen - determine the the length of header for an ethernet frame + * @data: pointer to start of frame + * @len: total length of frame + * + * Make a best effort attempt to pull the length for all of the headers for + * a given frame in a linear buffer. + */ +u32 eth_get_headlen(void *data, unsigned int len) +{ + const struct ethhdr *eth = (const struct ethhdr *)data; + struct flow_keys keys; + + /* this should never happen, but better safe than sorry */ + if (len < sizeof(*eth)) + return len; + + /* parse any remaining L2/L3 headers, check for L4 */ + if (!__skb_flow_dissect(NULL, &keys, data, + eth->h_proto, sizeof(*eth), len)) + return max_t(u32, keys.thoff, sizeof(*eth)); + + /* parse for any L4 headers */ + return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len); +} +EXPORT_SYMBOL(eth_get_headlen); + /** * eth_type_trans - determine the packet's protocol ID. * @skb: received socket data -- cgit v1.2.3 From dec38b5ce6a9edb406c60c2670b26a1a4262fdb9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 6 Sep 2014 01:11:12 +0100 Subject: regulator: isl9305: Add Intersil ISL9305/H driver The ISL9305 and ISL9305H are mini-PMICs offering two DCDC regulators and two LDO regulators. While there are some register differences between them these do not affect the current Linux driver as the relevant features are not yet supported. Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/isl9305.txt | 36 +++ drivers/regulator/Kconfig | 6 + drivers/regulator/Makefile | 1 + drivers/regulator/isl9305.c | 247 +++++++++++++++++++++ include/linux/platform_data/isl9305.h | 30 +++ 5 files changed, 320 insertions(+) create mode 100644 Documentation/devicetree/bindings/regulator/isl9305.txt create mode 100644 drivers/regulator/isl9305.c create mode 100644 include/linux/platform_data/isl9305.h (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/regulator/isl9305.txt b/Documentation/devicetree/bindings/regulator/isl9305.txt new file mode 100644 index 000000000000..a626fc1bbf0d --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/isl9305.txt @@ -0,0 +1,36 @@ +Intersil ISL9305/ISL9305H voltage regulator + +Required properties: + +- compatible: "isl,isl9305" or "isl,isl9305h" +- reg: I2C slave address, usually 0x68. +- regulators: A node that houses a sub-node for each regulator within the + device. Each sub-node is identified using the node's name, with valid + values being "dcd1", "dcd2", "ldo1" and "ldo2". The content of each sub-node + is defined by the standard binding for regulators; see regulator.txt. +- VINDCD1-supply: A phandle to a regulator node supplying VINDCD1. + VINDCD2-supply: A phandle to a regulator node supplying VINDCD2. + VINLDO1-supply: A phandle to a regulator node supplying VINLDO1. + VINLDO2-supply: A phandle to a regulator node supplying VINLDO2. + +Optional properties: +- Per-regulator optional properties are defined in regulator.txt + +Example + + pmic: isl9305@68 { + compatible = "isl,isl9305"; + reg = <0x68>; + + VINDCD1-supply = <&system_power>; + VINDCD2-supply = <&system_power>; + VINLDO1-supply = <&system_power>; + VINLDO2-supply = <&system_power>; + + regulators { + dcd1 { + regulator-name = "VDD_DSP"; + regulator-always-on; + }; + }; + }; diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index e6b98ed4c12f..3c8b6f401e4f 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -250,6 +250,12 @@ config REGULATOR_HI6421 21 general purpose LDOs, 3 dedicated LDOs, and 5 BUCKs. All of them come with support to either ECO (idle) or sleep mode. +config REGULATOR_ISL9305 + tristate "Intersil ISL9305 regulator" + depends on I2C + help + This driver supports ISL9305 voltage regulator chip. + config REGULATOR_ISL6271A tristate "Intersil ISL6271A Power regulator" depends on I2C diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 5513e2c141b1..e12fee8c943b 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_REGULATOR_FAN53555) += fan53555.o obj-$(CONFIG_REGULATOR_GPIO) += gpio-regulator.o obj-$(CONFIG_REGULATOR_HI6421) += hi6421-regulator.o obj-$(CONFIG_REGULATOR_ISL6271A) += isl6271a-regulator.o +obj-$(CONFIG_REGULATOR_ISL9305) += isl9305.o obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o obj-$(CONFIG_REGULATOR_LP3972) += lp3972.o obj-$(CONFIG_REGULATOR_LP872X) += lp872x.o diff --git a/drivers/regulator/isl9305.c b/drivers/regulator/isl9305.c new file mode 100644 index 000000000000..b0d12d186b68 --- /dev/null +++ b/drivers/regulator/isl9305.c @@ -0,0 +1,247 @@ +/* + * isl9305 - Intersil ISL9305 DCDC regulator + * + * Copyright 2014 Linaro Ltd + * + * Author: Mark Brown + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Registers + */ +#define ISL9305_DCD1OUT 0x0 +#define ISL9305_DCD2OUT 0x1 +#define ISL9305_LDO1OUT 0x2 +#define ISL9305_LDO2OUT 0x3 +#define ISL9305_DCD_PARAMETER 0x4 +#define ISL9305_SYSTEM_PARAMETER 0x5 +#define ISL9305_DCD_SRCTL 0x6 + +#define ISL9305_MAX_REG ISL9305_DCD_SRCTL + +/* + * DCD_PARAMETER + */ +#define ISL9305_DCD_PHASE 0x40 +#define ISL9305_DCD2_ULTRA 0x20 +#define ISL9305_DCD1_ULTRA 0x10 +#define ISL9305_DCD2_BLD 0x08 +#define ISL9305_DCD1_BLD 0x04 +#define ISL9305_DCD2_MODE 0x02 +#define ISL9305_DCD1_MODE 0x01 + +/* + * SYSTEM_PARAMETER + */ +#define ISL9305_I2C_EN 0x40 +#define ISL9305_DCDPOR_MASK 0x30 +#define ISL9305_LDO2_EN 0x08 +#define ISL9305_LDO1_EN 0x04 +#define ISL9305_DCD2_EN 0x02 +#define ISL9305_DCD1_EN 0x01 + +/* + * DCD_SRCTL + */ +#define ISL9305_DCD2SR_MASK 0xc0 +#define ISL9305_DCD1SR_MASK 0x07 + +static const struct regulator_ops isl9305_ops = { + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .list_voltage = regulator_list_voltage_linear, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_sel = regulator_set_voltage_sel_regmap, +}; + +static const struct regulator_desc isl9305_regulators[] = { + [ISL9305_DCD1] = { + .name = "DCD1", + .n_voltages = 0x70, + .min_uV = 825000, + .uV_step = 25000, + .vsel_reg = ISL9305_DCD1OUT, + .vsel_mask = 0x7f, + .enable_reg = ISL9305_SYSTEM_PARAMETER, + .enable_mask = ISL9305_DCD1_EN, + .supply_name = "VINDCD1", + .ops = &isl9305_ops, + }, + [ISL9305_DCD2] = { + .name = "DCD2", + .n_voltages = 0x70, + .min_uV = 825000, + .uV_step = 25000, + .vsel_reg = ISL9305_DCD2OUT, + .vsel_mask = 0x7f, + .enable_reg = ISL9305_SYSTEM_PARAMETER, + .enable_mask = ISL9305_DCD2_EN, + .supply_name = "VINDCD2", + .ops = &isl9305_ops, + }, + [ISL9305_LDO1] = { + .name = "LDO1", + .n_voltages = 0x37, + .min_uV = 900000, + .uV_step = 50000, + .vsel_reg = ISL9305_LDO1OUT, + .vsel_mask = 0x3f, + .enable_reg = ISL9305_SYSTEM_PARAMETER, + .enable_mask = ISL9305_LDO1_EN, + .supply_name = "VINLDO1", + .ops = &isl9305_ops, + }, + [ISL9305_LDO2] = { + .name = "LDO2", + .n_voltages = 0x37, + .min_uV = 900000, + .uV_step = 50000, + .vsel_reg = ISL9305_LDO2OUT, + .vsel_mask = 0x3f, + .enable_reg = ISL9305_SYSTEM_PARAMETER, + .enable_mask = ISL9305_LDO2_EN, + .supply_name = "VINLDO2", + .ops = &isl9305_ops, + }, +}; + +#ifdef CONFIG_OF +static struct of_regulator_match isl9305_reg_matches[] = { + [ISL9305_DCD1] = { .name = "dcd1" }, + [ISL9305_DCD2] = { .name = "dcd2" }, + [ISL9305_LDO1] = { .name = "ldo1" }, + [ISL9305_LDO2] = { .name = "ldo2" }, +}; + +static struct of_regulator_match *isl9305_parse_dt(struct i2c_client *i2c) +{ + struct device_node *node = i2c->dev.of_node; + struct of_regulator_match *matches; + struct device_node *regs; + int count; + + regs = of_get_child_by_name(node, "regulators"); + if (!regs) + return NULL; + + matches = devm_kmemdup(&i2c->dev, isl9305_reg_matches, + sizeof(isl9305_reg_matches), GFP_KERNEL); + if (!matches) + return NULL; + + count = of_regulator_match(&i2c->dev, regs, matches, + ARRAY_SIZE(isl9305_reg_matches)); + of_node_put(regs); + if ((count < 0) || (count > ARRAY_SIZE(isl9305_reg_matches))) + return NULL; + + return matches; +} +#else +static struct of_regulator_match *isl9305_parse_dt(struct i2c_client *i2c) +{ + return NULL; +} +#endif + +static const struct regmap_config isl9305_regmap = { + .reg_bits = 8, + .val_bits = 8, + + .max_register = ISL9305_MAX_REG, + .cache_type = REGCACHE_RBTREE, +}; + +static int isl9305_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct regulator_config config = { }; + struct isl9305_pdata *pdata = i2c->dev.platform_data; + struct of_regulator_match *of_matches; + struct regulator_dev *rdev; + struct regmap *regmap; + int i, ret; + + of_matches = isl9305_parse_dt(i2c); + + regmap = devm_regmap_init_i2c(i2c, &isl9305_regmap); + if (IS_ERR(regmap)) { + ret = PTR_ERR(regmap); + dev_err(&i2c->dev, "Failed to create regmap: %d\n", ret); + return ret; + } + + config.dev = &i2c->dev; + + for (i = 0; i < ARRAY_SIZE(isl9305_regulators); i++) { + config.of_node = NULL; + config.init_data = NULL; + + if (of_matches) { + config.init_data = of_matches[i].init_data; + config.of_node = of_matches[i].of_node; + } + + if (!config.init_data && pdata) + config.init_data = pdata->init_data[i]; + + rdev = devm_regulator_register(&i2c->dev, + &isl9305_regulators[i], + &config); + if (IS_ERR(rdev)) { + ret = PTR_ERR(rdev); + dev_err(&i2c->dev, "Failed to register %s: %d\n", + isl9305_regulators[i].name, ret); + return ret; + } + } + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id isl9305_dt_ids[] = { + { .compatible = "isl,isl9305" }, + { .compatible = "isl,isl9305h" }, + {}, +}; +#endif + +static const struct i2c_device_id isl9305_i2c_id[] = { + { "isl9305", }, + { "isl9305h", }, + { } +}; +MODULE_DEVICE_TABLE(i2c, isl9305_i2c_id); + +static struct i2c_driver isl9305_regulator_driver = { + .driver = { + .name = "isl9305", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(isl9305_dt_ids), + }, + .probe = isl9305_i2c_probe, + .id_table = isl9305_i2c_id, +}; + +module_i2c_driver(isl9305_regulator_driver); + +MODULE_AUTHOR("Mark Brown"); +MODULE_DESCRIPTION("Intersil ISL9305 DCDC regulator"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/platform_data/isl9305.h b/include/linux/platform_data/isl9305.h new file mode 100644 index 000000000000..1419133fa69e --- /dev/null +++ b/include/linux/platform_data/isl9305.h @@ -0,0 +1,30 @@ +/* + * isl9305 - Intersil ISL9305 DCDC regulator + * + * Copyright 2014 Linaro Ltd + * + * Author: Mark Brown + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __ISL9305_H +#define __ISL9305_H + +#define ISL9305_DCD1 0 +#define ISL9305_DCD2 1 +#define ISL9305_LDO1 2 +#define ISL9305_LDO2 3 + +#define ISL9305_MAX_REGULATOR ISL9305_LDO2 + +struct regulator_init_data; + +struct isl9305_pdata { + struct regulator_init_data *init_data[ISL9305_MAX_REGULATOR]; +}; + +#endif -- cgit v1.2.3 From a8adcc9012d8502e06ba7b3f966bad8f2c58edc3 Mon Sep 17 00:00:00 2001 From: Ramakrishna Pallala Date: Wed, 27 Aug 2014 23:44:08 +0530 Subject: power_supply: Add boot and calibration attributes Usually PMIC's come with coulomb counting mechanism which can be used to implement a Fuel Gauginig solution in Software itself. One of key input to these SW Fuel Gauge solutioons is the boot up parameters like boot voltage and boot current. This patch adds the VOLTAGE_BOOT and CURRENT_BOOT power supply attributes to report bootup voltage and current. This patch also adds CALIBRATE power supply attribute which useful is for calibrating the battery/coulomb counter. Signed-off-by: Ramakrishna Pallala Signed-off-by: Sebastian Reichel --- Documentation/power/power_supply_class.txt | 6 ++++++ drivers/power/power_supply_sysfs.c | 3 +++ include/linux/power_supply.h | 5 +++++ 3 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt index 48cff881cb8a..82dacc06e355 100644 --- a/Documentation/power/power_supply_class.txt +++ b/Documentation/power/power_supply_class.txt @@ -101,6 +101,10 @@ VOLTAGE_MAX, VOLTAGE_MIN - same as _DESIGN voltage values except that these ones should be used if hardware could only guess (measure and retain) the thresholds of a given power supply. +VOLTAGE_BOOT - Reports the voltage measured during boot + +CURRENT_BOOT - Reports the current measured during boot + CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN - design charge values, when battery considered full/empty. @@ -123,6 +127,8 @@ the current drawn from a charging source. CHARGE_TERM_CURRENT - Charge termination current used to detect the end of charge condition. +CALIBRATE - battery or coulomb counter calibration status + CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger. CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the power supply object. diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c index 750a20275664..7e4726d37211 100644 --- a/drivers/power/power_supply_sysfs.c +++ b/drivers/power/power_supply_sysfs.c @@ -149,9 +149,11 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(voltage_now), POWER_SUPPLY_ATTR(voltage_avg), POWER_SUPPLY_ATTR(voltage_ocv), + POWER_SUPPLY_ATTR(voltage_boot), POWER_SUPPLY_ATTR(current_max), POWER_SUPPLY_ATTR(current_now), POWER_SUPPLY_ATTR(current_avg), + POWER_SUPPLY_ATTR(current_boot), POWER_SUPPLY_ATTR(power_now), POWER_SUPPLY_ATTR(power_avg), POWER_SUPPLY_ATTR(charge_full_design), @@ -193,6 +195,7 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(type), POWER_SUPPLY_ATTR(scope), POWER_SUPPLY_ATTR(charge_term_current), + POWER_SUPPLY_ATTR(calibrate), /* Properties of type `const char *' */ POWER_SUPPLY_ATTR(model_name), POWER_SUPPLY_ATTR(manufacturer), diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index f3dea41dbcd2..de59a28b1b5b 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -102,9 +102,11 @@ enum power_supply_property { POWER_SUPPLY_PROP_VOLTAGE_NOW, POWER_SUPPLY_PROP_VOLTAGE_AVG, POWER_SUPPLY_PROP_VOLTAGE_OCV, + POWER_SUPPLY_PROP_VOLTAGE_BOOT, POWER_SUPPLY_PROP_CURRENT_MAX, POWER_SUPPLY_PROP_CURRENT_NOW, POWER_SUPPLY_PROP_CURRENT_AVG, + POWER_SUPPLY_PROP_CURRENT_BOOT, POWER_SUPPLY_PROP_POWER_NOW, POWER_SUPPLY_PROP_POWER_AVG, POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, @@ -146,6 +148,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_TYPE, /* use power_supply.type instead */ POWER_SUPPLY_PROP_SCOPE, POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT, + POWER_SUPPLY_PROP_CALIBRATE, /* Properties of type `const char *' */ POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_MANUFACTURER, @@ -291,6 +294,7 @@ static inline bool power_supply_is_amp_property(enum power_supply_property psp) case POWER_SUPPLY_PROP_CURRENT_MAX: case POWER_SUPPLY_PROP_CURRENT_NOW: case POWER_SUPPLY_PROP_CURRENT_AVG: + case POWER_SUPPLY_PROP_CURRENT_BOOT: return 1; default: break; @@ -315,6 +319,7 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp) case POWER_SUPPLY_PROP_VOLTAGE_NOW: case POWER_SUPPLY_PROP_VOLTAGE_AVG: case POWER_SUPPLY_PROP_VOLTAGE_OCV: + case POWER_SUPPLY_PROP_VOLTAGE_BOOT: case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE: case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX: case POWER_SUPPLY_PROP_POWER_NOW: -- cgit v1.2.3 From 521d24ee598bd8a8b71d7ac76ce2c0da0e548406 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Tue, 8 Jul 2014 18:26:18 -0400 Subject: rcu: Return bool type in rcu_lockdep_current_cpu_online() Return true instead of 1 in rcu_lockdep_current_cpu_online() as this has bool as return type. Signed-off-by: Pranith Kumar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d231aa17b1d7..7e47e44bce03 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -349,7 +349,7 @@ bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ static inline bool rcu_lockdep_current_cpu_online(void) { - return 1; + return true; } #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ -- cgit v1.2.3 From 85b39d305bfe809a11ff2770d380be3e2465beec Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 8 Jul 2014 15:17:59 -0700 Subject: rcu: Uninline rcu_read_lock_held() This commit uninlines rcu_read_lock_held(). According to "size vmlinux" this saves 28549 in .text: - 5541731 3014560 14757888 23314179 + 5513182 3026848 14757888 23297918 Note: it looks as if the data grows by 12288 bytes but this is not true, it does not actually grow. But .data starts with ALIGN(THREAD_SIZE) and since .text shrinks the padding grows, and thus .data grows too as it seen by /bin/size. diff System.map: - ffffffff81510000 D _sdata - ffffffff81510000 D init_thread_union + ffffffff81509000 D _sdata + ffffffff8150c000 D init_thread_union Perhaps we can change vmlinux.lds.S to .data itself, so that /bin/size can't "wrongly" report that .data grows if .text shinks. Signed-off-by: Oleg Nesterov Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 36 +----------------------------------- kernel/rcu/update.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7e47e44bce03..321ed0d4e675 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -371,41 +371,7 @@ extern struct lockdep_map rcu_sched_lock_map; extern struct lockdep_map rcu_callback_map; int debug_lockdep_rcu_enabled(void); -/** - * rcu_read_lock_held() - might we be in RCU read-side critical section? - * - * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU - * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, - * this assumes we are in an RCU read-side critical section unless it can - * prove otherwise. This is useful for debug checks in functions that - * require that they be called within an RCU read-side critical section. - * - * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot - * and while lockdep is disabled. - * - * Note that rcu_read_lock() and the matching rcu_read_unlock() must - * occur in the same context, for example, it is illegal to invoke - * rcu_read_unlock() in process context if the matching rcu_read_lock() - * was invoked from within an irq handler. - * - * Note that rcu_read_lock() is disallowed if the CPU is either idle or - * offline from an RCU perspective, so check for those as well. - */ -static inline int rcu_read_lock_held(void) -{ - if (!debug_lockdep_rcu_enabled()) - return 1; - if (!rcu_is_watching()) - return 0; - if (!rcu_lockdep_current_cpu_online()) - return 0; - return lock_is_held(&rcu_lock_map); -} - -/* - * rcu_read_lock_bh_held() is defined out of line to avoid #include-file - * hell. - */ +int rcu_read_lock_held(void); int rcu_read_lock_bh_held(void); /** diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 4056d7992a6c..ea8ea7b16e11 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -136,6 +136,38 @@ int notrace debug_lockdep_rcu_enabled(void) } EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); +/** + * rcu_read_lock_held() - might we be in RCU read-side critical section? + * + * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU + * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, + * this assumes we are in an RCU read-side critical section unless it can + * prove otherwise. This is useful for debug checks in functions that + * require that they be called within an RCU read-side critical section. + * + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot + * and while lockdep is disabled. + * + * Note that rcu_read_lock() and the matching rcu_read_unlock() must + * occur in the same context, for example, it is illegal to invoke + * rcu_read_unlock() in process context if the matching rcu_read_lock() + * was invoked from within an irq handler. + * + * Note that rcu_read_lock() is disallowed if the CPU is either idle or + * offline from an RCU perspective, so check for those as well. + */ +int rcu_read_lock_held(void) +{ + if (!debug_lockdep_rcu_enabled()) + return 1; + if (!rcu_is_watching()) + return 0; + if (!rcu_lockdep_current_cpu_online()) + return 0; + return lock_is_held(&rcu_lock_map); +} +EXPORT_SYMBOL_GPL(rcu_read_lock_held); + /** * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? * -- cgit v1.2.3 From eea203fea3484598280a07fe503e025e886297fb Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 14 Jul 2014 09:16:15 -0400 Subject: rcu: Use pr_alert/pr_cont for printing logs User pr_alert/pr_cont for printing the logs from rcutorture module directly instead of writing it to a buffer and then printing it. This allows us from not having to allocate such buffers. Also remove a resulting empty function. I tested this using the parse-torture.sh script as follows: $ dmesg | grep torture > log.txt $ bash parse-torture.sh log.txt test $ There were no warnings which means that parsing went fine. Signed-off-by: Joe Perches Signed-off-by: Pranith Kumar Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 2 +- kernel/rcu/rcutorture.c | 127 +++++++++++++++++++++--------------------------- kernel/torture.c | 16 +++--- 3 files changed, 64 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/include/linux/torture.h b/include/linux/torture.h index 5ca58fcbaf1b..fec46f8c08eb 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -51,7 +51,7 @@ /* Definitions for online/offline exerciser. */ int torture_onoff_init(long ooholdoff, long oointerval); -char *torture_onoff_stats(char *page); +void torture_onoff_stats(void); bool torture_onoff_failures(void); /* Low-rider random number generator. */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 7e67711cbae8..ff4f0c756dee 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -242,7 +242,7 @@ struct rcu_torture_ops { void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); void (*cb_barrier)(void); void (*fqs)(void); - void (*stats)(char *page); + void (*stats)(void); int irq_capable; int can_boost; const char *name; @@ -525,21 +525,21 @@ static void srcu_torture_barrier(void) srcu_barrier(&srcu_ctl); } -static void srcu_torture_stats(char *page) +static void srcu_torture_stats(void) { int cpu; int idx = srcu_ctl.completed & 0x1; - page += sprintf(page, "%s%s per-CPU(idx=%d):", - torture_type, TORTURE_FLAG, idx); + pr_alert("%s%s per-CPU(idx=%d):", + torture_type, TORTURE_FLAG, idx); for_each_possible_cpu(cpu) { long c0, c1; c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx]; c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]; - page += sprintf(page, " %d(%ld,%ld)", cpu, c0, c1); + pr_cont(" %d(%ld,%ld)", cpu, c0, c1); } - sprintf(page, "\n"); + pr_cont("\n"); } static void srcu_torture_synchronize_expedited(void) @@ -1031,10 +1031,15 @@ rcu_torture_reader(void *arg) } /* - * Create an RCU-torture statistics message in the specified buffer. + * Print torture statistics. Caller must ensure that there is only + * one call to this function at a given time!!! This is normally + * accomplished by relying on the module system to only have one copy + * of the module loaded, and then by giving the rcu_torture_stats + * kthread full control (or the init/cleanup functions when rcu_torture_stats + * thread is not running). */ static void -rcu_torture_printk(char *page) +rcu_torture_stats_print(void) { int cpu; int i; @@ -1052,55 +1057,60 @@ rcu_torture_printk(char *page) if (pipesummary[i] != 0) break; } - page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG); - page += sprintf(page, - "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", - rcu_torture_current, - rcu_torture_current_version, - list_empty(&rcu_torture_freelist), - atomic_read(&n_rcu_torture_alloc), - atomic_read(&n_rcu_torture_alloc_fail), - atomic_read(&n_rcu_torture_free)); - page += sprintf(page, "rtmbe: %d rtbke: %ld rtbre: %ld ", - atomic_read(&n_rcu_torture_mberror), - n_rcu_torture_boost_ktrerror, - n_rcu_torture_boost_rterror); - page += sprintf(page, "rtbf: %ld rtb: %ld nt: %ld ", - n_rcu_torture_boost_failure, - n_rcu_torture_boosts, - n_rcu_torture_timers); - page = torture_onoff_stats(page); - page += sprintf(page, "barrier: %ld/%ld:%ld", - n_barrier_successes, - n_barrier_attempts, - n_rcu_torture_barrier_error); - page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); + + pr_alert("%s%s ", torture_type, TORTURE_FLAG); + pr_cont("rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", + rcu_torture_current, + rcu_torture_current_version, + list_empty(&rcu_torture_freelist), + atomic_read(&n_rcu_torture_alloc), + atomic_read(&n_rcu_torture_alloc_fail), + atomic_read(&n_rcu_torture_free)); + pr_cont("rtmbe: %d rtbke: %ld rtbre: %ld ", + atomic_read(&n_rcu_torture_mberror), + n_rcu_torture_boost_ktrerror, + n_rcu_torture_boost_rterror); + pr_cont("rtbf: %ld rtb: %ld nt: %ld ", + n_rcu_torture_boost_failure, + n_rcu_torture_boosts, + n_rcu_torture_timers); + torture_onoff_stats(); + pr_cont("barrier: %ld/%ld:%ld\n", + n_barrier_successes, + n_barrier_attempts, + n_rcu_torture_barrier_error); + + pr_alert("%s%s ", torture_type, TORTURE_FLAG); if (atomic_read(&n_rcu_torture_mberror) != 0 || n_rcu_torture_barrier_error != 0 || n_rcu_torture_boost_ktrerror != 0 || n_rcu_torture_boost_rterror != 0 || n_rcu_torture_boost_failure != 0 || i > 1) { - page += sprintf(page, "!!! "); + pr_cont("%s", "!!! "); atomic_inc(&n_rcu_torture_error); WARN_ON_ONCE(1); } - page += sprintf(page, "Reader Pipe: "); + pr_cont("Reader Pipe: "); for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) - page += sprintf(page, " %ld", pipesummary[i]); - page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); - page += sprintf(page, "Reader Batch: "); + pr_cont(" %ld", pipesummary[i]); + pr_cont("\n"); + + pr_alert("%s%s ", torture_type, TORTURE_FLAG); + pr_cont("Reader Batch: "); for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) - page += sprintf(page, " %ld", batchsummary[i]); - page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); - page += sprintf(page, "Free-Block Circulation: "); + pr_cont(" %ld", batchsummary[i]); + pr_cont("\n"); + + pr_alert("%s%s ", torture_type, TORTURE_FLAG); + pr_cont("Free-Block Circulation: "); for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { - page += sprintf(page, " %d", - atomic_read(&rcu_torture_wcount[i])); + pr_cont(" %d", atomic_read(&rcu_torture_wcount[i])); } - page += sprintf(page, "\n"); + pr_cont("\n"); + if (cur_ops->stats) - cur_ops->stats(page); + cur_ops->stats(); if (rtcv_snap == rcu_torture_current_version && rcu_torture_current != NULL) { int __maybe_unused flags; @@ -1109,40 +1119,15 @@ rcu_torture_printk(char *page) rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed); - page += sprintf(page, - "??? Writer stall state %d g%lu c%lu f%#x\n", - rcu_torture_writer_state, - gpnum, completed, flags); + pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n", + rcu_torture_writer_state, + gpnum, completed, flags); show_rcu_gp_kthreads(); rcutorture_trace_dump(); } rtcv_snap = rcu_torture_current_version; } -/* - * Print torture statistics. Caller must ensure that there is only - * one call to this function at a given time!!! This is normally - * accomplished by relying on the module system to only have one copy - * of the module loaded, and then by giving the rcu_torture_stats - * kthread full control (or the init/cleanup functions when rcu_torture_stats - * thread is not running). - */ -static void -rcu_torture_stats_print(void) -{ - int size = nr_cpu_ids * 200 + 8192; - char *buf; - - buf = kmalloc(size, GFP_KERNEL); - if (!buf) { - pr_err("rcu-torture: Out of memory, need: %d", size); - return; - } - rcu_torture_printk(buf); - pr_alert("%s", buf); - kfree(buf); -} - /* * Periodically prints torture statistics, if periodic statistics printing * was specified via the stat_interval module parameter. diff --git a/kernel/torture.c b/kernel/torture.c index d600af21f022..ede8b25ec1ae 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -211,18 +211,16 @@ EXPORT_SYMBOL_GPL(torture_onoff_cleanup); /* * Print online/offline testing statistics. */ -char *torture_onoff_stats(char *page) +void torture_onoff_stats(void) { #ifdef CONFIG_HOTPLUG_CPU - page += sprintf(page, - "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", - n_online_successes, n_online_attempts, - n_offline_successes, n_offline_attempts, - min_online, max_online, - min_offline, max_offline, - sum_online, sum_offline, HZ); + pr_cont("onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", + n_online_successes, n_online_attempts, + n_offline_successes, n_offline_attempts, + min_online, max_online, + min_offline, max_offline, + sum_online, sum_offline, HZ); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ - return page; } EXPORT_SYMBOL_GPL(torture_onoff_stats); -- cgit v1.2.3 From 8315f42295d2667a7f942f154b73a86fd7cb2227 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 27 Jun 2014 13:42:20 -0700 Subject: rcu: Add call_rcu_tasks() This commit adds a new RCU-tasks flavor of RCU, which provides call_rcu_tasks(). This RCU flavor's quiescent states are voluntary context switch (not preemption!) and userspace execution (not the idle loop -- use some sort of schedule_on_each_cpu() if you need to handle the idle tasks. Note that unlike other RCU flavors, these quiescent states occur in tasks, not necessarily CPUs. Includes fixes from Steven Rostedt. This RCU flavor is assumed to have very infrequent latency-tolerant updaters. This assumption permits significant simplifications, including a single global callback list protected by a single global lock, along with a single task-private linked list containing all tasks that have not yet passed through a quiescent state. If experience shows this assumption to be incorrect, the required additional complexity will be added. Suggested-by: Steven Rostedt Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 9 +++ include/linux/rcupdate.h | 36 ++++++++++ include/linux/sched.h | 23 ++++--- init/Kconfig | 10 +++ kernel/rcu/tiny.c | 2 + kernel/rcu/tree.c | 2 + kernel/rcu/update.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 242 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2bb4c4f3531a..dffd9258ee60 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -117,6 +117,14 @@ extern struct group_info init_groups; #else #define INIT_TASK_RCU_PREEMPT(tsk) #endif +#ifdef CONFIG_TASKS_RCU +#define INIT_TASK_RCU_TASKS(tsk) \ + .rcu_tasks_holdout = false, \ + .rcu_tasks_holdout_list = \ + LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), +#else +#define INIT_TASK_RCU_TASKS(tsk) +#endif extern struct cred init_cred; @@ -224,6 +232,7 @@ extern struct task_group root_task_group; INIT_FTRACE_GRAPH \ INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ + INIT_TASK_RCU_TASKS(tsk) \ INIT_CPUSET_SEQ(tsk) \ INIT_RT_MUTEXES(tsk) \ INIT_VTIME(tsk) \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d231aa17b1d7..3432063f4c87 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -197,6 +197,26 @@ void call_rcu_sched(struct rcu_head *head, void synchronize_sched(void); +/** + * call_rcu_tasks() - Queue an RCU for invocation task-based grace period + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_tasks() assumes + * that the read-side critical sections end at a voluntary context + * switch (not a preemption!), entry into idle, or transition to usermode + * execution. As such, there are no read-side primitives analogous to + * rcu_read_lock() and rcu_read_unlock() because this primitive is intended + * to determine that all tasks have passed through a safe state, not so + * much for data-strcuture synchronization. + * + * See the description of call_rcu() for more detailed information on + * memory ordering guarantees. + */ +void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head)); + #ifdef CONFIG_PREEMPT_RCU void __rcu_read_lock(void); @@ -294,6 +314,22 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, rcu_irq_exit(); \ } while (0) +/* + * Note a voluntary context switch for RCU-tasks benefit. This is a + * macro rather than an inline function to avoid #include hell. + */ +#ifdef CONFIG_TASKS_RCU +#define rcu_note_voluntary_context_switch(t) \ + do { \ + preempt_disable(); /* Exclude synchronize_sched(); */ \ + if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \ + ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \ + preempt_enable(); \ + } while (0) +#else /* #ifdef CONFIG_TASKS_RCU */ +#define rcu_note_voluntary_context_switch(t) do { } while (0) +#endif /* #else #ifdef CONFIG_TASKS_RCU */ + #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) bool __rcu_is_watching(void); #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c2c885ee52b..eaacac4ae77d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1270,6 +1270,11 @@ struct task_struct { #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ +#ifdef CONFIG_TASKS_RCU + unsigned long rcu_tasks_nvcsw; + bool rcu_tasks_holdout; + struct list_head rcu_tasks_holdout_list; +#endif /* #ifdef CONFIG_TASKS_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; @@ -2000,28 +2005,24 @@ extern void task_clear_jobctl_pending(struct task_struct *task, unsigned int mask); #ifdef CONFIG_PREEMPT_RCU - #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ +#endif /* #ifdef CONFIG_PREEMPT_RCU */ static inline void rcu_copy_process(struct task_struct *p) { +#ifdef CONFIG_PREEMPT_RCU p->rcu_read_lock_nesting = 0; p->rcu_read_unlock_special = 0; -#ifdef CONFIG_TREE_PREEMPT_RCU p->rcu_blocked_node = NULL; -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ INIT_LIST_HEAD(&p->rcu_node_entry); +#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_TASKS_RCU + p->rcu_tasks_holdout = false; + INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); +#endif /* #ifdef CONFIG_TASKS_RCU */ } -#else - -static inline void rcu_copy_process(struct task_struct *p) -{ -} - -#endif - static inline void tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) { diff --git a/init/Kconfig b/init/Kconfig index e84c6423a2e5..c4539c4e177f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -507,6 +507,16 @@ config PREEMPT_RCU This option enables preemptible-RCU code that is common between TREE_PREEMPT_RCU and, in the old days, TINY_PREEMPT_RCU. +config TASKS_RCU + bool "Task_based RCU implementation using voluntary context switch" + default n + help + This option enables a task-based RCU implementation that uses + only voluntary context switch (not preemption!), idle, and + user-mode execution as quiescent states. + + If unsure, say N. + config RCU_STALL_COMMON def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE ) help diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index d9efcc13008c..717f00854fc0 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -254,6 +254,8 @@ void rcu_check_callbacks(int cpu, int user) rcu_sched_qs(cpu); else if (!in_softirq()) rcu_bh_qs(cpu); + if (user) + rcu_note_voluntary_context_switch(current); } /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1b70cb6fbe3c..8ad91d1e317d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2410,6 +2410,8 @@ void rcu_check_callbacks(int cpu, int user) rcu_preempt_check_callbacks(cpu); if (rcu_pending(cpu)) invoke_rcu_core(); + if (user) + rcu_note_voluntary_context_switch(current); trace_rcu_utilization(TPS("End scheduler-tick")); } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 4056d7992a6c..19b3dacb0753 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -47,6 +47,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS @@ -347,3 +348,173 @@ static int __init check_cpu_stall_init(void) early_initcall(check_cpu_stall_init); #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ + +#ifdef CONFIG_TASKS_RCU + +/* + * Simple variant of RCU whose quiescent states are voluntary context switch, + * user-space execution, and idle. As such, grace periods can take one good + * long time. There are no read-side primitives similar to rcu_read_lock() + * and rcu_read_unlock() because this implementation is intended to get + * the system into a safe state for some of the manipulations involved in + * tracing and the like. Finally, this implementation does not support + * high call_rcu_tasks() rates from multiple CPUs. If this is required, + * per-CPU callback lists will be needed. + */ + +/* Global list of callbacks and associated lock. */ +static struct rcu_head *rcu_tasks_cbs_head; +static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; +static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); + +/* Post an RCU-tasks callback. */ +void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) +{ + unsigned long flags; + + rhp->next = NULL; + rhp->func = func; + raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags); + *rcu_tasks_cbs_tail = rhp; + rcu_tasks_cbs_tail = &rhp->next; + raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); +} +EXPORT_SYMBOL_GPL(call_rcu_tasks); + +/* See if the current task has stopped holding out, remove from list if so. */ +static void check_holdout_task(struct task_struct *t) +{ + if (!ACCESS_ONCE(t->rcu_tasks_holdout) || + t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) || + !ACCESS_ONCE(t->on_rq)) { + ACCESS_ONCE(t->rcu_tasks_holdout) = false; + list_del_rcu(&t->rcu_tasks_holdout_list); + put_task_struct(t); + } +} + +/* RCU-tasks kthread that detects grace periods and invokes callbacks. */ +static int __noreturn rcu_tasks_kthread(void *arg) +{ + unsigned long flags; + struct task_struct *g, *t; + struct rcu_head *list; + struct rcu_head *next; + LIST_HEAD(rcu_tasks_holdouts); + + /* FIXME: Add housekeeping affinity. */ + + /* + * Each pass through the following loop makes one check for + * newly arrived callbacks, and, if there are some, waits for + * one RCU-tasks grace period and then invokes the callbacks. + * This loop is terminated by the system going down. ;-) + */ + for (;;) { + + /* Pick up any new callbacks. */ + raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags); + list = rcu_tasks_cbs_head; + rcu_tasks_cbs_head = NULL; + rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; + raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags); + + /* If there were none, wait a bit and start over. */ + if (!list) { + schedule_timeout_interruptible(HZ); + WARN_ON(signal_pending(current)); + continue; + } + + /* + * Wait for all pre-existing t->on_rq and t->nvcsw + * transitions to complete. Invoking synchronize_sched() + * suffices because all these transitions occur with + * interrupts disabled. Without this synchronize_sched(), + * a read-side critical section that started before the + * grace period might be incorrectly seen as having started + * after the grace period. + * + * This synchronize_sched() also dispenses with the + * need for a memory barrier on the first store to + * ->rcu_tasks_holdout, as it forces the store to happen + * after the beginning of the grace period. + */ + synchronize_sched(); + + /* + * There were callbacks, so we need to wait for an + * RCU-tasks grace period. Start off by scanning + * the task list for tasks that are not already + * voluntarily blocked. Mark these tasks and make + * a list of them in rcu_tasks_holdouts. + */ + rcu_read_lock(); + for_each_process_thread(g, t) { + if (t != current && ACCESS_ONCE(t->on_rq) && + !is_idle_task(t)) { + get_task_struct(t); + t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw); + ACCESS_ONCE(t->rcu_tasks_holdout) = true; + list_add(&t->rcu_tasks_holdout_list, + &rcu_tasks_holdouts); + } + } + rcu_read_unlock(); + + /* + * Each pass through the following loop scans the list + * of holdout tasks, removing any that are no longer + * holdouts. When the list is empty, we are done. + */ + while (!list_empty(&rcu_tasks_holdouts)) { + schedule_timeout_interruptible(HZ); + WARN_ON(signal_pending(current)); + rcu_read_lock(); + list_for_each_entry_rcu(t, &rcu_tasks_holdouts, + rcu_tasks_holdout_list) + check_holdout_task(t); + rcu_read_unlock(); + } + + /* + * Because ->on_rq and ->nvcsw are not guaranteed + * to have a full memory barriers prior to them in the + * schedule() path, memory reordering on other CPUs could + * cause their RCU-tasks read-side critical sections to + * extend past the end of the grace period. However, + * because these ->nvcsw updates are carried out with + * interrupts disabled, we can use synchronize_sched() + * to force the needed ordering on all such CPUs. + * + * This synchronize_sched() also confines all + * ->rcu_tasks_holdout accesses to be within the grace + * period, avoiding the need for memory barriers for + * ->rcu_tasks_holdout accesses. + */ + synchronize_sched(); + + /* Invoke the callbacks. */ + while (list) { + next = list->next; + local_bh_disable(); + list->func(list); + local_bh_enable(); + list = next; + cond_resched(); + } + } +} + +/* Spawn rcu_tasks_kthread() at boot time. */ +static int __init rcu_spawn_tasks_kthread(void) +{ + struct task_struct __maybe_unused *t; + + t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread"); + BUG_ON(IS_ERR(t)); + return 0; +} +early_initcall(rcu_spawn_tasks_kthread); + +#endif /* #ifdef CONFIG_TASKS_RCU */ -- cgit v1.2.3 From bde6c3aa993066acb0d6ce32ecabe03b9d5df92d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Jul 2014 11:26:57 -0700 Subject: rcu: Provide cond_resched_rcu_qs() to force quiescent states in long loops RCU-tasks requires the occasional voluntary context switch from CPU-bound in-kernel tasks. In some cases, this requires instrumenting cond_resched(). However, there is some reluctance to countenance unconditionally instrumenting cond_resched() (see http://lwn.net/Articles/603252/), so this commit creates a separate cond_resched_rcu_qs() that may be used in place of cond_resched() in locations prone to long-duration in-kernel looping. This commit currently instruments only RCU-tasks. Future possibilities include also instrumenting RCU, RCU-bh, and RCU-sched in order to reduce IPI usage. Signed-off-by: Paul E. McKenney --- fs/file.c | 2 +- include/linux/rcupdate.h | 13 +++++++++++++ kernel/rcu/rcutorture.c | 4 ++-- kernel/rcu/tree.c | 12 ++++++------ kernel/rcu/tree_plugin.h | 2 +- mm/mlock.c | 2 +- 6 files changed, 24 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/file.c b/fs/file.c index 66923fe3176e..1cafc4c9275b 100644 --- a/fs/file.c +++ b/fs/file.c @@ -367,7 +367,7 @@ static struct fdtable *close_files(struct files_struct * files) struct file * file = xchg(&fdt->fd[i], NULL); if (file) { filp_close(file, files); - cond_resched(); + cond_resched_rcu_qs(); } } i++; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3432063f4c87..473350462d04 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -330,6 +330,19 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, #define rcu_note_voluntary_context_switch(t) do { } while (0) #endif /* #else #ifdef CONFIG_TASKS_RCU */ +/** + * cond_resched_rcu_qs - Report potential quiescent states to RCU + * + * This macro resembles cond_resched(), except that it is defined to + * report potential quiescent states to RCU-tasks even if the cond_resched() + * machinery were to be shut off, as some advocate for PREEMPT kernels. + */ +#define cond_resched_rcu_qs() \ +do { \ + rcu_note_voluntary_context_switch(current); \ + cond_resched(); \ +} while (0) + #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) bool __rcu_is_watching(void); #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 948a7693748e..178716713e11 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -667,7 +667,7 @@ static int rcu_torture_boost(void *arg) } call_rcu_time = jiffies; } - cond_resched(); + cond_resched_rcu_qs(); stutter_wait("rcu_torture_boost"); if (torture_must_stop()) goto checkwait; @@ -1019,7 +1019,7 @@ rcu_torture_reader(void *arg) __this_cpu_inc(rcu_torture_batch[completed]); preempt_enable(); cur_ops->readunlock(idx); - cond_resched(); + cond_resched_rcu_qs(); stutter_wait("rcu_torture_reader"); } while (!torture_must_stop()); if (irqreader && cur_ops->irq_capable) { diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8ad91d1e317d..e23dad0661e2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1647,7 +1647,7 @@ static int rcu_gp_init(struct rcu_state *rsp) rnp->level, rnp->grplo, rnp->grphi, rnp->qsmask); raw_spin_unlock_irq(&rnp->lock); - cond_resched(); + cond_resched_rcu_qs(); } mutex_unlock(&rsp->onoff_mutex); @@ -1736,7 +1736,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) /* smp_mb() provided by prior unlock-lock pair. */ nocb += rcu_future_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); - cond_resched(); + cond_resched_rcu_qs(); } rnp = rcu_get_root(rsp); raw_spin_lock_irq(&rnp->lock); @@ -1785,7 +1785,7 @@ static int __noreturn rcu_gp_kthread(void *arg) /* Locking provides needed memory barrier. */ if (rcu_gp_init(rsp)) break; - cond_resched(); + cond_resched_rcu_qs(); flush_signals(current); trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), @@ -1828,10 +1828,10 @@ static int __noreturn rcu_gp_kthread(void *arg) trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), TPS("fqsend")); - cond_resched(); + cond_resched_rcu_qs(); } else { /* Deal with stray signal. */ - cond_resched(); + cond_resched_rcu_qs(); flush_signals(current); trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), @@ -2434,7 +2434,7 @@ static void force_qs_rnp(struct rcu_state *rsp, struct rcu_node *rnp; rcu_for_each_leaf_node(rsp, rnp) { - cond_resched(); + cond_resched_rcu_qs(); mask = 0; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index a7997e272564..7672586d3920 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1848,7 +1848,7 @@ static int rcu_oom_notify(struct notifier_block *self, get_online_cpus(); for_each_online_cpu(cpu) { smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); - cond_resched(); + cond_resched_rcu_qs(); } put_online_cpus(); diff --git a/mm/mlock.c b/mm/mlock.c index ce84cb0b83ef..ab3150c26711 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -789,7 +789,7 @@ static int do_mlockall(int flags) /* Ignore errors */ mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); - cond_resched(); + cond_resched_rcu_qs(); } out: return 0; -- cgit v1.2.3 From 53c6d4edf874d3cbc031a53738c6cba9277faea5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Jul 2014 12:22:23 -0700 Subject: rcu: Add synchronous grace-period waiting for RCU-tasks It turns out to be easier to add the synchronous grace-period waiting functions to RCU-tasks than to work around their absense in rcutorture, so this commit adds them. The key point is that the existence of call_rcu_tasks() means that rcutorture needs an rcu_barrier_tasks(). Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 ++ kernel/rcu/update.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 473350462d04..640152fedcde 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -216,6 +216,8 @@ void synchronize_sched(void); * memory ordering guarantees. */ void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head)); +void synchronize_rcu_tasks(void); +void rcu_barrier_tasks(void); #ifdef CONFIG_PREEMPT_RCU diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 19b3dacb0753..5fd1ddbfcc55 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -381,6 +381,61 @@ void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) } EXPORT_SYMBOL_GPL(call_rcu_tasks); +/** + * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed. + * + * Control will return to the caller some time after a full rcu-tasks + * grace period has elapsed, in other words after all currently + * executing rcu-tasks read-side critical sections have elapsed. These + * read-side critical sections are delimited by calls to schedule(), + * cond_resched_rcu_qs(), idle execution, userspace execution, calls + * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched(). + * + * This is a very specialized primitive, intended only for a few uses in + * tracing and other situations requiring manipulation of function + * preambles and profiling hooks. The synchronize_rcu_tasks() function + * is not (yet) intended for heavy use from multiple CPUs. + * + * Note that this guarantee implies further memory-ordering guarantees. + * On systems with more than one CPU, when synchronize_rcu_tasks() returns, + * each CPU is guaranteed to have executed a full memory barrier since the + * end of its last RCU-tasks read-side critical section whose beginning + * preceded the call to synchronize_rcu_tasks(). In addition, each CPU + * having an RCU-tasks read-side critical section that extends beyond + * the return from synchronize_rcu_tasks() is guaranteed to have executed + * a full memory barrier after the beginning of synchronize_rcu_tasks() + * and before the beginning of that RCU-tasks read-side critical section. + * Note that these guarantees include CPUs that are offline, idle, or + * executing in user mode, as well as CPUs that are executing in the kernel. + * + * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned + * to its caller on CPU B, then both CPU A and CPU B are guaranteed + * to have executed a full memory barrier during the execution of + * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU + * (but again only if the system has more than one CPU). + */ +void synchronize_rcu_tasks(void) +{ + /* Complain if the scheduler has not started. */ + rcu_lockdep_assert(!rcu_scheduler_active, + "synchronize_rcu_tasks called too soon"); + + /* Wait for the grace period. */ + wait_rcu_gp(call_rcu_tasks); +} + +/** + * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks. + * + * Although the current implementation is guaranteed to wait, it is not + * obligated to, for example, if there are no pending callbacks. + */ +void rcu_barrier_tasks(void) +{ + /* There is only one callback queue, so this is easy. ;-) */ + synchronize_rcu_tasks(); +} + /* See if the current task has stopped holding out, remove from list if so. */ static void check_holdout_task(struct task_struct *t) { -- cgit v1.2.3 From 3f95aa81d265223fdb13ea2b59883766a05adbdf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Aug 2014 06:10:23 -0700 Subject: rcu: Make TASKS_RCU handle tasks that are almost done exiting Once a task has passed exit_notify() in the do_exit() code path, it is no longer on the task lists, and is therefore no longer visible to rcu_tasks_kthread(). This means that an almost-exited task might be preempted while within a trampoline, and this task won't be waited on by rcu_tasks_kthread(). This commit fixes this bug by adding an srcu_struct. An exiting task does srcu_read_lock() just before calling exit_notify(), and does the corresponding srcu_read_unlock() after doing the final preempt_disable(). This means that rcu_tasks_kthread() can do synchronize_srcu() to wait for all mostly-exited tasks to reach their final preempt_disable() region, and then use synchronize_sched() to wait for those tasks to finish exiting. Reported-by: Oleg Nesterov Suggested-by: Lai Jiangshan Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 3 +++ kernel/exit.c | 3 +++ kernel/rcu/update.c | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 640152fedcde..54b2ebb20313 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -321,6 +321,8 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, * macro rather than an inline function to avoid #include hell. */ #ifdef CONFIG_TASKS_RCU +#define TASKS_RCU(x) x +extern struct srcu_struct tasks_rcu_exit_srcu; #define rcu_note_voluntary_context_switch(t) \ do { \ preempt_disable(); /* Exclude synchronize_sched(); */ \ @@ -329,6 +331,7 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, preempt_enable(); \ } while (0) #else /* #ifdef CONFIG_TASKS_RCU */ +#define TASKS_RCU(x) do { } while (0) #define rcu_note_voluntary_context_switch(t) do { } while (0) #endif /* #else #ifdef CONFIG_TASKS_RCU */ diff --git a/kernel/exit.c b/kernel/exit.c index 32c58f7433a3..d13f2eec4bb8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -667,6 +667,7 @@ void do_exit(long code) { struct task_struct *tsk = current; int group_dead; + TASKS_RCU(int tasks_rcu_i); profile_task_exit(tsk); @@ -775,6 +776,7 @@ void do_exit(long code) */ flush_ptrace_hw_breakpoint(tsk); + TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu)); exit_notify(tsk, group_dead); proc_exit_connector(tsk); #ifdef CONFIG_NUMA @@ -814,6 +816,7 @@ void do_exit(long code) if (tsk->nr_dirtied) __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); exit_rcu(); + TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i)); /* * The setting of TASK_RUNNING by try_to_wake_up() may be delayed diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 5fd1ddbfcc55..403fc4ae539e 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -367,6 +367,13 @@ static struct rcu_head *rcu_tasks_cbs_head; static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head; static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); +/* Track exiting tasks in order to allow them to be waited for. */ +DEFINE_SRCU(tasks_rcu_exit_srcu); + +/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ +static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 3; +module_param(rcu_task_stall_timeout, int, 0644); + /* Post an RCU-tasks callback. */ void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) { @@ -517,6 +524,15 @@ static int __noreturn rcu_tasks_kthread(void *arg) } rcu_read_unlock(); + /* + * Wait for tasks that are in the process of exiting. + * This does only part of the job, ensuring that all + * tasks that were previously exiting reach the point + * where they have disabled preemption, allowing the + * later synchronize_sched() to finish the job. + */ + synchronize_srcu(&tasks_rcu_exit_srcu); + /* * Each pass through the following loop scans the list * of holdout tasks, removing any that are no longer @@ -546,6 +562,11 @@ static int __noreturn rcu_tasks_kthread(void *arg) * ->rcu_tasks_holdout accesses to be within the grace * period, avoiding the need for memory barriers for * ->rcu_tasks_holdout accesses. + * + * In addition, this synchronize_sched() waits for exiting + * tasks to complete their final preempt_disable() region + * of execution, cleaning up after the synchronize_srcu() + * above. */ synchronize_sched(); -- cgit v1.2.3 From 69c604557ce34015629b325b85ff1a4996038a3b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Jul 2014 11:59:36 -0700 Subject: rcutorture: Add torture tests for RCU-tasks This commit adds torture tests for RCU-tasks. It also fixes a bug that would segfault for an RCU flavor lacking a callback-barrier function. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 1 + kernel/rcu/rcutorture.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 54b2ebb20313..a3123f53a4ce 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -55,6 +55,7 @@ enum rcutorture_type { RCU_FLAVOR, RCU_BH_FLAVOR, RCU_SCHED_FLAVOR, + RCU_TASKS_FLAVOR, SRCU_FLAVOR, INVALID_RCU_FLAVOR }; diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 178716713e11..75b1abf78c48 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -601,6 +601,52 @@ static struct rcu_torture_ops sched_ops = { .name = "sched" }; +#ifdef CONFIG_TASKS_RCU + +/* + * Definitions for RCU-tasks torture testing. + */ + +static int tasks_torture_read_lock(void) +{ + return 0; +} + +static void tasks_torture_read_unlock(int idx) +{ +} + +static void rcu_tasks_torture_deferred_free(struct rcu_torture *p) +{ + call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb); +} + +static struct rcu_torture_ops tasks_ops = { + .ttype = RCU_TASKS_FLAVOR, + .init = rcu_sync_torture_init, + .readlock = tasks_torture_read_lock, + .read_delay = rcu_read_delay, /* just reuse rcu's version. */ + .readunlock = tasks_torture_read_unlock, + .completed = rcu_no_completed, + .deferred_free = rcu_tasks_torture_deferred_free, + .sync = synchronize_rcu_tasks, + .exp_sync = synchronize_rcu_tasks, + .call = call_rcu_tasks, + .cb_barrier = rcu_barrier_tasks, + .fqs = NULL, + .stats = NULL, + .irq_capable = 1, + .name = "tasks" +}; + +#define RCUTORTURE_TASKS_OPS &tasks_ops, + +#else /* #ifdef CONFIG_TASKS_RCU */ + +#define RCUTORTURE_TASKS_OPS + +#endif /* #else #ifdef CONFIG_TASKS_RCU */ + /* * RCU torture priority-boost testing. Runs one real-time thread per * CPU for moderate bursts, repeatedly registering RCU callbacks and @@ -1295,7 +1341,8 @@ static int rcu_torture_barrier_cbs(void *arg) if (atomic_dec_and_test(&barrier_cbs_count)) wake_up(&barrier_wq); } while (!torture_must_stop()); - cur_ops->cb_barrier(); + if (cur_ops->cb_barrier != NULL) + cur_ops->cb_barrier(); destroy_rcu_head_on_stack(&rcu); torture_kthread_stopping("rcu_torture_barrier_cbs"); return 0; @@ -1534,6 +1581,7 @@ rcu_torture_init(void) int firsterr = 0; static struct rcu_torture_ops *torture_ops[] = { &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, + RCUTORTURE_TASKS_OPS }; if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable)) -- cgit v1.2.3 From 176f8f7a52cc6d09d686f0d900abda6942a52fbb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Aug 2014 17:43:50 -0700 Subject: rcu: Make TASKS_RCU handle nohz_full= CPUs Currently TASKS_RCU would ignore a CPU running a task in nohz_full= usermode execution. There would be neither a context switch nor a scheduling-clock interrupt to tell TASKS_RCU that the task in question had passed through a quiescent state. The grace period would therefore extend indefinitely. This commit therefore makes RCU's dyntick-idle subsystem record the task_struct structure of the task that is running in dyntick-idle mode on each CPU. The TASKS_RCU grace period can then access this information and record a quiescent state on behalf of any CPU running in dyntick-idle usermode. Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 3 ++- include/linux/sched.h | 2 ++ kernel/rcu/tree.c | 2 ++ kernel/rcu/tree.h | 2 ++ kernel/rcu/tree_plugin.h | 16 ++++++++++++++++ kernel/rcu/update.c | 4 +++- 6 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index dffd9258ee60..03b274873b06 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -121,7 +121,8 @@ extern struct group_info init_groups; #define INIT_TASK_RCU_TASKS(tsk) \ .rcu_tasks_holdout = false, \ .rcu_tasks_holdout_list = \ - LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), + LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), \ + .rcu_tasks_idle_cpu = -1, #else #define INIT_TASK_RCU_TASKS(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index eaacac4ae77d..ec8b34722bcc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1274,6 +1274,7 @@ struct task_struct { unsigned long rcu_tasks_nvcsw; bool rcu_tasks_holdout; struct list_head rcu_tasks_holdout_list; + int rcu_tasks_idle_cpu; #endif /* #ifdef CONFIG_TASKS_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) @@ -2020,6 +2021,7 @@ static inline void rcu_copy_process(struct task_struct *p) #ifdef CONFIG_TASKS_RCU p->rcu_tasks_holdout = false; INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); + p->rcu_tasks_idle_cpu = -1; #endif /* #ifdef CONFIG_TASKS_RCU */ } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e23dad0661e2..c880f5387b1f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -526,6 +526,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, atomic_inc(&rdtp->dynticks); smp_mb__after_atomic(); /* Force ordering with next sojourn. */ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); + rcu_dynticks_task_enter(); /* * It is illegal to enter an extended quiescent state while @@ -642,6 +643,7 @@ void rcu_irq_exit(void) static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, int user) { + rcu_dynticks_task_exit(); smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */ atomic_inc(&rdtp->dynticks); /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 6a86eb7bac45..3a92000c354f 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -605,6 +605,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, static void rcu_bind_gp_kthread(void); static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp); static bool rcu_nohz_full_cpu(struct rcu_state *rsp); +static void rcu_dynticks_task_enter(void); +static void rcu_dynticks_task_exit(void); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7672586d3920..e466b40052a7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -3036,3 +3036,19 @@ static void rcu_bind_gp_kthread(void) housekeeping_affine(current); #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ } + +/* Record the current task on dyntick-idle entry. */ +static void rcu_dynticks_task_enter(void) +{ +#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id(); +#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ +} + +/* Record no current task on dyntick-idle exit. */ +static void rcu_dynticks_task_exit(void) +{ +#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1; +#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ +} diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index e1d71741958f..2658de4a5975 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -463,7 +463,9 @@ static void check_holdout_task(struct task_struct *t, { if (!ACCESS_ONCE(t->rcu_tasks_holdout) || t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) || - !ACCESS_ONCE(t->on_rq)) { + !ACCESS_ONCE(t->on_rq) || + (IS_ENABLED(CONFIG_NO_HZ_FULL) && + !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) { ACCESS_ONCE(t->rcu_tasks_holdout) = false; list_del_rcu(&t->rcu_tasks_holdout_list); put_task_struct(t); -- cgit v1.2.3 From 01a81330344b09028881c953a51d1106a9e63518 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 5 Aug 2014 05:23:35 -0700 Subject: rcu: Remove redundant preempt_disable() from rcu_note_voluntary_context_switch() In theory, synchronize_sched() requires a read-side critical section to order against. In practice, preemption can be thought of as being disabled across every machine instruction, at least for those machine instructions that are not in the idle loop and not on offline CPUs. So this commit removes the redundant preempt_disable() from rcu_note_voluntary_context_switch(). Please note that the single instruction in question is the store of zero to ->rcu_tasks_holdout. The "if" is simply a performance optimization that avoids unnecessary stores. To see this, keep in mind that both the "if" condition and the store are in a quiescent state. Therefore, even if the task is preempted for a full grace period (presumably due to its having done a context switch beforehand), the store will be recording a legitimate quiescent state. Reported-by: Lai Jiangshan Signed-off-by: Paul E. McKenney Conflicts: include/linux/rcupdate.h --- include/linux/rcupdate.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a3123f53a4ce..132e1e34cdca 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -326,10 +326,8 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, extern struct srcu_struct tasks_rcu_exit_srcu; #define rcu_note_voluntary_context_switch(t) \ do { \ - preempt_disable(); /* Exclude synchronize_sched(); */ \ if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \ ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \ - preempt_enable(); \ } while (0) #else /* #ifdef CONFIG_TASKS_RCU */ #define TASKS_RCU(x) do { } while (0) -- cgit v1.2.3 From 1d082fd061884a587c490c4fc8a2056ce1e47624 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 14 Aug 2014 16:01:53 -0700 Subject: rcu: Remove local_irq_disable() in rcu_preempt_note_context_switch() The rcu_preempt_note_context_switch() function is on a scheduling fast path, so it would be good to avoid disabling irqs. The reason that irqs are disabled is to synchronize process-level and irq-handler access to the task_struct ->rcu_read_unlock_special bitmask. This commit therefore makes ->rcu_read_unlock_special instead be a union of bools with a short allowing single-access checks in RCU's __rcu_read_unlock(). This results in the process-level and irq-handler accesses being simple loads and stores, so that irqs need no longer be disabled. This commit therefore removes the irq disabling from rcu_preempt_note_context_switch(). Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 2 +- include/linux/sched.h | 16 +++++++++------- kernel/rcu/tree_plugin.h | 32 +++++++++++++++----------------- kernel/rcu/update.c | 2 +- 4 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 03b274873b06..77fc43f8fb72 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -111,7 +111,7 @@ extern struct group_info init_groups; #ifdef CONFIG_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ .rcu_read_lock_nesting = 0, \ - .rcu_read_unlock_special = 0, \ + .rcu_read_unlock_special.s = 0, \ .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ INIT_TASK_RCU_TREE_PREEMPT() #else diff --git a/include/linux/sched.h b/include/linux/sched.h index ec8b34722bcc..42888d715fb1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1212,6 +1212,13 @@ struct sched_dl_entity { struct hrtimer dl_timer; }; +union rcu_special { + struct { + bool blocked; + bool need_qs; + } b; + short s; +}; struct rcu_node; enum perf_event_task_context { @@ -1264,7 +1271,7 @@ struct task_struct { #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; - char rcu_read_unlock_special; + union rcu_special rcu_read_unlock_special; struct list_head rcu_node_entry; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TREE_PREEMPT_RCU @@ -2005,16 +2012,11 @@ extern void task_clear_jobctl_trapping(struct task_struct *task); extern void task_clear_jobctl_pending(struct task_struct *task, unsigned int mask); -#ifdef CONFIG_PREEMPT_RCU -#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ -#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ -#endif /* #ifdef CONFIG_PREEMPT_RCU */ - static inline void rcu_copy_process(struct task_struct *p) { #ifdef CONFIG_PREEMPT_RCU p->rcu_read_lock_nesting = 0; - p->rcu_read_unlock_special = 0; + p->rcu_read_unlock_special.s = 0; p->rcu_blocked_node = NULL; INIT_LIST_HEAD(&p->rcu_node_entry); #endif /* #ifdef CONFIG_PREEMPT_RCU */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index e466b40052a7..0981c0cd70fe 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -155,9 +155,8 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); * not in a quiescent state. There might be any number of tasks blocked * while in an RCU read-side critical section. * - * Unlike the other rcu_*_qs() functions, callers to this function - * must disable irqs in order to protect the assignment to - * ->rcu_read_unlock_special. + * As with the other rcu_*_qs() functions, callers to this function + * must disable preemption. */ static void rcu_preempt_qs(int cpu) { @@ -166,7 +165,7 @@ static void rcu_preempt_qs(int cpu) if (rdp->passed_quiesce == 0) trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs")); rdp->passed_quiesce = 1; - current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + current->rcu_read_unlock_special.b.need_qs = false; } /* @@ -190,14 +189,14 @@ static void rcu_preempt_note_context_switch(int cpu) struct rcu_node *rnp; if (t->rcu_read_lock_nesting > 0 && - (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { + !t->rcu_read_unlock_special.b.blocked) { /* Possibly blocking in an RCU read-side critical section. */ rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; + t->rcu_read_unlock_special.b.blocked = true; t->rcu_blocked_node = rnp; /* @@ -239,7 +238,7 @@ static void rcu_preempt_note_context_switch(int cpu) : rnp->gpnum + 1); raw_spin_unlock_irqrestore(&rnp->lock, flags); } else if (t->rcu_read_lock_nesting < 0 && - t->rcu_read_unlock_special) { + t->rcu_read_unlock_special.s) { /* * Complete exit from RCU read-side critical section on @@ -257,9 +256,7 @@ static void rcu_preempt_note_context_switch(int cpu) * grace period, then the fact that the task has been enqueued * means that we continue to block the current grace period. */ - local_irq_save(flags); rcu_preempt_qs(cpu); - local_irq_restore(flags); } /* @@ -340,7 +337,7 @@ void rcu_read_unlock_special(struct task_struct *t) bool drop_boost_mutex = false; #endif /* #ifdef CONFIG_RCU_BOOST */ struct rcu_node *rnp; - int special; + union rcu_special special; /* NMI handlers cannot block and cannot safely manipulate state. */ if (in_nmi()) @@ -350,12 +347,13 @@ void rcu_read_unlock_special(struct task_struct *t) /* * If RCU core is waiting for this CPU to exit critical section, - * let it know that we have done so. + * let it know that we have done so. Because irqs are disabled, + * t->rcu_read_unlock_special cannot change. */ special = t->rcu_read_unlock_special; - if (special & RCU_READ_UNLOCK_NEED_QS) { + if (special.b.need_qs) { rcu_preempt_qs(smp_processor_id()); - if (!t->rcu_read_unlock_special) { + if (!t->rcu_read_unlock_special.s) { local_irq_restore(flags); return; } @@ -368,8 +366,8 @@ void rcu_read_unlock_special(struct task_struct *t) } /* Clean up if blocked during RCU read-side critical section. */ - if (special & RCU_READ_UNLOCK_BLOCKED) { - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; + if (special.b.blocked) { + t->rcu_read_unlock_special.b.blocked = false; /* * Remove this task from the list it blocked on. The @@ -658,7 +656,7 @@ static void rcu_preempt_check_callbacks(int cpu) } if (t->rcu_read_lock_nesting > 0 && per_cpu(rcu_preempt_data, cpu).qs_pending) - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; + t->rcu_read_unlock_special.b.need_qs = true; } #ifdef CONFIG_RCU_BOOST @@ -941,7 +939,7 @@ void exit_rcu(void) return; t->rcu_read_lock_nesting = 1; barrier(); - t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED; + t->rcu_read_unlock_special.b.blocked = true; __rcu_read_unlock(); } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 9487b4898e51..6fb911558562 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -93,7 +93,7 @@ void __rcu_read_unlock(void) barrier(); /* critical section before exit code. */ t->rcu_read_lock_nesting = INT_MIN; barrier(); /* assign before ->rcu_read_unlock_special load */ - if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) + if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s))) rcu_read_unlock_special(t); barrier(); /* ->rcu_read_unlock_special load before assign */ t->rcu_read_lock_nesting = 0; -- cgit v1.2.3 From 284a8c93af47306beed967a303d84730b32bab39 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 14 Aug 2014 16:38:46 -0700 Subject: rcu: Per-CPU operation cleanups to rcu_*_qs() functions The rcu_bh_qs(), rcu_preempt_qs(), and rcu_sched_qs() functions use old-style per-CPU variable access and write to ->passed_quiesce even if it is already set. This commit therefore updates to use the new-style per-CPU variable access functions and avoids the spurious writes. This commit also eliminates the "cpu" argument to these functions because they are always invoked on the indicated CPU. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ++-- include/linux/rcutiny.h | 2 +- kernel/rcu/tiny.c | 10 +++++----- kernel/rcu/tree.c | 34 ++++++++++++++++++---------------- kernel/rcu/tree_plugin.h | 27 +++++++++++++++------------ kernel/softirq.c | 2 +- 6 files changed, 42 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 132e1e34cdca..2fab0e37afe0 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -261,8 +261,8 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ void rcu_init(void); -void rcu_sched_qs(int cpu); -void rcu_bh_qs(int cpu); +void rcu_sched_qs(void); +void rcu_bh_qs(void); void rcu_check_callbacks(int cpu, int user); struct notifier_block; void rcu_idle_enter(void); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index d40a6a451330..38cc5b1e252d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -80,7 +80,7 @@ static inline void kfree_call_rcu(struct rcu_head *head, static inline void rcu_note_context_switch(int cpu) { - rcu_sched_qs(cpu); + rcu_sched_qs(); } /* diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 717f00854fc0..61b8d2ccc2cb 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -72,7 +72,7 @@ static void rcu_idle_enter_common(long long newval) current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ } - rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ + rcu_sched_qs(); /* implies rcu_bh_inc() */ barrier(); rcu_dynticks_nesting = newval; } @@ -217,7 +217,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) * are at it, given that any rcu quiescent state is also an rcu_bh * quiescent state. Use "+" instead of "||" to defeat short circuiting. */ -void rcu_sched_qs(int cpu) +void rcu_sched_qs(void) { unsigned long flags; @@ -231,7 +231,7 @@ void rcu_sched_qs(int cpu) /* * Record an rcu_bh quiescent state. */ -void rcu_bh_qs(int cpu) +void rcu_bh_qs(void) { unsigned long flags; @@ -251,9 +251,9 @@ void rcu_check_callbacks(int cpu, int user) { RCU_TRACE(check_cpu_stalls()); if (user || rcu_is_cpu_rrupt_from_idle()) - rcu_sched_qs(cpu); + rcu_sched_qs(); else if (!in_softirq()) - rcu_bh_qs(cpu); + rcu_bh_qs(); if (user) rcu_note_voluntary_context_switch(current); } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c880f5387b1f..4c340625ffd4 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -188,22 +188,24 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) * one since the start of the grace period, this just sets a flag. * The caller must have disabled preemption. */ -void rcu_sched_qs(int cpu) +void rcu_sched_qs(void) { - struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); - - if (rdp->passed_quiesce == 0) - trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs")); - rdp->passed_quiesce = 1; + if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) { + trace_rcu_grace_period(TPS("rcu_sched"), + __this_cpu_read(rcu_sched_data.gpnum), + TPS("cpuqs")); + __this_cpu_write(rcu_sched_data.passed_quiesce, 1); + } } -void rcu_bh_qs(int cpu) +void rcu_bh_qs(void) { - struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); - - if (rdp->passed_quiesce == 0) - trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs")); - rdp->passed_quiesce = 1; + if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) { + trace_rcu_grace_period(TPS("rcu_bh"), + __this_cpu_read(rcu_bh_data.gpnum), + TPS("cpuqs")); + __this_cpu_write(rcu_bh_data.passed_quiesce, 1); + } } static DEFINE_PER_CPU(int, rcu_sched_qs_mask); @@ -278,7 +280,7 @@ static void rcu_momentary_dyntick_idle(void) void rcu_note_context_switch(int cpu) { trace_rcu_utilization(TPS("Start context switch")); - rcu_sched_qs(cpu); + rcu_sched_qs(); rcu_preempt_note_context_switch(cpu); if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) rcu_momentary_dyntick_idle(); @@ -2395,8 +2397,8 @@ void rcu_check_callbacks(int cpu, int user) * at least not while the corresponding CPU is online. */ - rcu_sched_qs(cpu); - rcu_bh_qs(cpu); + rcu_sched_qs(); + rcu_bh_qs(); } else if (!in_softirq()) { @@ -2407,7 +2409,7 @@ void rcu_check_callbacks(int cpu, int user) * critical section, so note it. */ - rcu_bh_qs(cpu); + rcu_bh_qs(); } rcu_preempt_check_callbacks(cpu); if (rcu_pending(cpu)) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0981c0cd70fe..25e692a36280 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -158,14 +158,16 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); * As with the other rcu_*_qs() functions, callers to this function * must disable preemption. */ -static void rcu_preempt_qs(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - - if (rdp->passed_quiesce == 0) - trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs")); - rdp->passed_quiesce = 1; - current->rcu_read_unlock_special.b.need_qs = false; +static void rcu_preempt_qs(void) +{ + if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) { + trace_rcu_grace_period(TPS("rcu_preempt"), + __this_cpu_read(rcu_preempt_data.gpnum), + TPS("cpuqs")); + __this_cpu_write(rcu_preempt_data.passed_quiesce, 1); + barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ + current->rcu_read_unlock_special.b.need_qs = false; + } } /* @@ -256,7 +258,7 @@ static void rcu_preempt_note_context_switch(int cpu) * grace period, then the fact that the task has been enqueued * means that we continue to block the current grace period. */ - rcu_preempt_qs(cpu); + rcu_preempt_qs(); } /* @@ -352,7 +354,7 @@ void rcu_read_unlock_special(struct task_struct *t) */ special = t->rcu_read_unlock_special; if (special.b.need_qs) { - rcu_preempt_qs(smp_processor_id()); + rcu_preempt_qs(); if (!t->rcu_read_unlock_special.s) { local_irq_restore(flags); return; @@ -651,11 +653,12 @@ static void rcu_preempt_check_callbacks(int cpu) struct task_struct *t = current; if (t->rcu_read_lock_nesting == 0) { - rcu_preempt_qs(cpu); + rcu_preempt_qs(); return; } if (t->rcu_read_lock_nesting > 0 && - per_cpu(rcu_preempt_data, cpu).qs_pending) + per_cpu(rcu_preempt_data, cpu).qs_pending && + !per_cpu(rcu_preempt_data, cpu).passed_quiesce) t->rcu_read_unlock_special.b.need_qs = true; } diff --git a/kernel/softirq.c b/kernel/softirq.c index 5918d227730f..348ec763b104 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -278,7 +278,7 @@ restart: pending >>= softirq_bit; } - rcu_bh_qs(smp_processor_id()); + rcu_bh_qs(); local_irq_disable(); pending = local_softirq_pending(); -- cgit v1.2.3 From 908c7f1949cb7cc6e92ba8f18f2998e87e265b8e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 09:51:29 +0900 Subject: percpu_counter: add @gfp to percpu_counter_init() Percpu allocator now supports allocation mask. Add @gfp to percpu_counter_init() so that !GFP_KERNEL allocation masks can be used with percpu_counters too. We could have left percpu_counter_init() alone and added percpu_counter_init_gfp(); however, the number of users isn't that high and introducing _gfp variants to all percpu data structures would be quite ugly, so let's just do the conversion. This is the one with the most users. Other percpu data structures are a lot easier to convert. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo Acked-by: Jan Kara Acked-by: "David S. Miller" Cc: x86@kernel.org Cc: Jens Axboe Cc: "Theodore Ts'o" Cc: Alexander Viro Cc: Andrew Morton --- arch/x86/kvm/mmu.c | 2 +- fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/extent-tree.c | 2 +- fs/ext2/super.c | 6 +++--- fs/ext3/super.c | 6 +++--- fs/ext4/super.c | 14 +++++++++----- fs/file_table.c | 2 +- fs/quota/dquot.c | 2 +- fs/super.c | 3 ++- include/linux/percpu_counter.h | 10 ++++++---- include/net/dst_ops.h | 2 +- include/net/inet_frag.h | 2 +- lib/flex_proportions.c | 4 ++-- lib/percpu_counter.c | 4 ++-- lib/proportions.c | 6 +++--- mm/backing-dev.c | 2 +- mm/mmap.c | 2 +- mm/nommu.c | 2 +- mm/shmem.c | 2 +- net/dccp/proto.c | 2 +- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_memcontrol.c | 2 +- net/sctp/protocol.c | 2 +- 23 files changed, 49 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 931467881da7..5bd53f206f4f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4534,7 +4534,7 @@ int kvm_mmu_module_init(void) if (!mmu_page_header_cache) goto nomem; - if (percpu_counter_init(&kvm_total_used_mmu_pages, 0)) + if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL)) goto nomem; register_shrinker(&mmu_shrinker); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 08e65e9cf2aa..61dae01788d7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1180,7 +1180,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) if (!writers) return ERR_PTR(-ENOMEM); - ret = percpu_counter_init(&writers->counter, 0); + ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL); if (ret < 0) { kfree(writers); return ERR_PTR(ret); @@ -2185,7 +2185,7 @@ int open_ctree(struct super_block *sb, goto fail_srcu; } - ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0); + ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); if (ret) { err = ret; goto fail_bdi; @@ -2193,13 +2193,13 @@ int open_ctree(struct super_block *sb, fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE * (1 + ilog2(nr_cpu_ids)); - ret = percpu_counter_init(&fs_info->delalloc_bytes, 0); + ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL); if (ret) { err = ret; goto fail_dirty_metadata_bytes; } - ret = percpu_counter_init(&fs_info->bio_counter, 0); + ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL); if (ret) { err = ret; goto fail_delalloc_bytes; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 813537f362f9..94ec71eda86b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3493,7 +3493,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, if (!found) return -ENOMEM; - ret = percpu_counter_init(&found->total_bytes_pinned, 0); + ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL); if (ret) { kfree(found); return ret; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index b88edc05c230..170dc41e8bf4 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1067,14 +1067,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ext2_rsv_window_add(sb, &sbi->s_rsv_window_head); err = percpu_counter_init(&sbi->s_freeblocks_counter, - ext2_count_free_blocks(sb)); + ext2_count_free_blocks(sb), GFP_KERNEL); if (!err) { err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext2_count_free_inodes(sb)); + ext2_count_free_inodes(sb), GFP_KERNEL); } if (!err) { err = percpu_counter_init(&sbi->s_dirs_counter, - ext2_count_dirs(sb)); + ext2_count_dirs(sb), GFP_KERNEL); } if (err) { ext2_msg(sb, KERN_ERR, "error: insufficient memory"); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 08cdfe5461e3..eba021b88440 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2039,14 +2039,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount2; } err = percpu_counter_init(&sbi->s_freeblocks_counter, - ext3_count_free_blocks(sb)); + ext3_count_free_blocks(sb), GFP_KERNEL); if (!err) { err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext3_count_free_inodes(sb)); + ext3_count_free_inodes(sb), GFP_KERNEL); } if (!err) { err = percpu_counter_init(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); + ext3_count_dirs(sb), GFP_KERNEL); } if (err) { ext3_msg(sb, KERN_ERR, "error: insufficient memory"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 32b43ad154b9..e25ca8fdde7d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3891,7 +3891,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Register extent status tree shrinker */ ext4_es_register_shrinker(sbi); - if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) { + err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL); + if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); goto failed_mount3; } @@ -4105,17 +4106,20 @@ no_journal: block = ext4_count_free_clusters(sb); ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block)); - err = percpu_counter_init(&sbi->s_freeclusters_counter, block); + err = percpu_counter_init(&sbi->s_freeclusters_counter, block, + GFP_KERNEL); if (!err) { unsigned long freei = ext4_count_free_inodes(sb); sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); - err = percpu_counter_init(&sbi->s_freeinodes_counter, freei); + err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, + GFP_KERNEL); } if (!err) err = percpu_counter_init(&sbi->s_dirs_counter, - ext4_count_dirs(sb)); + ext4_count_dirs(sb), GFP_KERNEL); if (!err) - err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0); + err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, + GFP_KERNEL); if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); goto failed_mount6; diff --git a/fs/file_table.c b/fs/file_table.c index 385bfd31512a..0bab12b20460 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -331,5 +331,5 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); - percpu_counter_init(&nr_files, 0); + percpu_counter_init(&nr_files, 0, GFP_KERNEL); } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index f2d0eee9d1f1..8b663b2d9562 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2725,7 +2725,7 @@ static int __init dquot_init(void) panic("Cannot create dquot hash table"); for (i = 0; i < _DQST_DQSTAT_LAST; i++) { - ret = percpu_counter_init(&dqstats.counter[i], 0); + ret = percpu_counter_init(&dqstats.counter[i], 0, GFP_KERNEL); if (ret) panic("Cannot create dquot stat counters"); } diff --git a/fs/super.c b/fs/super.c index b9a214d2fe98..1b836107acee 100644 --- a/fs/super.c +++ b/fs/super.c @@ -175,7 +175,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) goto fail; for (i = 0; i < SB_FREEZE_LEVELS; i++) { - if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0) + if (percpu_counter_init(&s->s_writers.counter[i], 0, + GFP_KERNEL) < 0) goto fail; lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], &type->s_writers_key[i], 0); diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index d5dd4657c8d6..50e50095c8d1 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_SMP @@ -26,14 +27,14 @@ struct percpu_counter { extern int percpu_counter_batch; -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, +int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key); -#define percpu_counter_init(fbc, value) \ +#define percpu_counter_init(fbc, value, gfp) \ ({ \ static struct lock_class_key __key; \ \ - __percpu_counter_init(fbc, value, &__key); \ + __percpu_counter_init(fbc, value, gfp, &__key); \ }) void percpu_counter_destroy(struct percpu_counter *fbc); @@ -89,7 +90,8 @@ struct percpu_counter { s64 count; }; -static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount) +static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount, + gfp_t gfp) { fbc->count = amount; return 0; diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 2f26dfb8450e..1f99a1de0e4f 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -63,7 +63,7 @@ static inline void dst_entries_add(struct dst_ops *dst, int val) static inline int dst_entries_init(struct dst_ops *dst) { - return percpu_counter_init(&dst->pcpuc_entries, 0); + return percpu_counter_init(&dst->pcpuc_entries, 0, GFP_KERNEL); } static inline void dst_entries_destroy(struct dst_ops *dst) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 65a8855e99fe..8d1765577acc 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -151,7 +151,7 @@ static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i) static inline void init_frag_mem_limit(struct netns_frags *nf) { - percpu_counter_init(&nf->mem, 0); + percpu_counter_init(&nf->mem, 0, GFP_KERNEL); } static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index ebf3bac460b0..b9d026bfcf38 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -40,7 +40,7 @@ int fprop_global_init(struct fprop_global *p) p->period = 0; /* Use 1 to avoid dealing with periods with 0 events... */ - err = percpu_counter_init(&p->events, 1); + err = percpu_counter_init(&p->events, 1, GFP_KERNEL); if (err) return err; seqcount_init(&p->sequence); @@ -172,7 +172,7 @@ int fprop_local_init_percpu(struct fprop_local_percpu *pl) { int err; - err = percpu_counter_init(&pl->events, 0); + err = percpu_counter_init(&pl->events, 0, GFP_KERNEL); if (err) return err; pl->period = 0; diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 3fde78275cd1..48144cdae819 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -112,7 +112,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) } EXPORT_SYMBOL(__percpu_counter_sum); -int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, +int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key) { unsigned long flags __maybe_unused; @@ -120,7 +120,7 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, raw_spin_lock_init(&fbc->lock); lockdep_set_class(&fbc->lock, key); fbc->count = amount; - fbc->counters = alloc_percpu(s32); + fbc->counters = alloc_percpu_gfp(s32, gfp); if (!fbc->counters) return -ENOMEM; diff --git a/lib/proportions.c b/lib/proportions.c index 05df84801b56..ca95f8d54384 100644 --- a/lib/proportions.c +++ b/lib/proportions.c @@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift) pd->index = 0; pd->pg[0].shift = shift; mutex_init(&pd->mutex); - err = percpu_counter_init(&pd->pg[0].events, 0); + err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL); if (err) goto out; - err = percpu_counter_init(&pd->pg[1].events, 0); + err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL); if (err) percpu_counter_destroy(&pd->pg[0].events); @@ -193,7 +193,7 @@ int prop_local_init_percpu(struct prop_local_percpu *pl) raw_spin_lock_init(&pl->lock); pl->shift = 0; pl->period = 0; - return percpu_counter_init(&pl->events, 0); + return percpu_counter_init(&pl->events, 0, GFP_KERNEL); } void prop_local_destroy_percpu(struct prop_local_percpu *pl) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1706cbbdf5f0..f19a818be2d3 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -455,7 +455,7 @@ int bdi_init(struct backing_dev_info *bdi) bdi_wb_init(&bdi->wb, bdi); for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { - err = percpu_counter_init(&bdi->bdi_stat[i], 0); + err = percpu_counter_init(&bdi->bdi_stat[i], 0, GFP_KERNEL); if (err) goto err; } diff --git a/mm/mmap.c b/mm/mmap.c index c1f2ea4a0b99..d7ec93e25fa1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3196,7 +3196,7 @@ void __init mmap_init(void) { int ret; - ret = percpu_counter_init(&vm_committed_as, 0); + ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); VM_BUG_ON(ret); } diff --git a/mm/nommu.c b/mm/nommu.c index a881d9673c6b..bd1808e194a7 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -539,7 +539,7 @@ void __init mmap_init(void) { int ret; - ret = percpu_counter_init(&vm_committed_as, 0); + ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); VM_BUG_ON(ret); vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); } diff --git a/mm/shmem.c b/mm/shmem.c index 0e5fb225007c..d4bc55d3f107 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2993,7 +2993,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) #endif spin_lock_init(&sbinfo->stat_lock); - if (percpu_counter_init(&sbinfo->used_blocks, 0)) + if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) goto failed; sbinfo->free_inodes = sbinfo->max_inodes; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index de2c1e719305..e421eddf67b4 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1115,7 +1115,7 @@ static int __init dccp_init(void) BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); - rc = percpu_counter_init(&dccp_orphan_count, 0); + rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL); if (rc) goto out_fail; rc = -ENOBUFS; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 541f26a67ba2..d59c2604c247 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3188,8 +3188,8 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); - percpu_counter_init(&tcp_sockets_allocated, 0); - percpu_counter_init(&tcp_orphan_count, 0); + percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); + percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 3af522622fad..1d191357bf88 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -32,7 +32,7 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) res_parent = &parent_cg->memory_allocated; res_counter_init(&cg_proto->memory_allocated, res_parent); - percpu_counter_init(&cg_proto->sockets_allocated, 0); + percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL); return 0; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 6240834f4b95..f00a85a3fddd 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1341,7 +1341,7 @@ static __init int sctp_init(void) if (!sctp_chunk_cachep) goto err_chunk_cachep; - status = percpu_counter_init(&sctp_sockets_allocated, 0); + status = percpu_counter_init(&sctp_sockets_allocated, 0, GFP_KERNEL); if (status) goto err_percpu_counter_init; -- cgit v1.2.3 From 20ae00792c6f1f18fc4fc5965445a145df92827e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 09:51:30 +0900 Subject: proportions: add @gfp to init functions Percpu allocator now supports allocation mask. Add @gfp to [flex_]proportions init functions so that !GFP_KERNEL allocation masks can be used with them too. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Peter Zijlstra --- include/linux/flex_proportions.h | 5 +++-- include/linux/proportions.h | 5 +++-- lib/flex_proportions.c | 8 ++++---- lib/proportions.c | 10 +++++----- mm/backing-dev.c | 2 +- mm/page-writeback.c | 2 +- 6 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h index 4ebc49fae391..0d348e011a6e 100644 --- a/include/linux/flex_proportions.h +++ b/include/linux/flex_proportions.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * When maximum proportion of some event type is specified, this is the @@ -32,7 +33,7 @@ struct fprop_global { seqcount_t sequence; }; -int fprop_global_init(struct fprop_global *p); +int fprop_global_init(struct fprop_global *p, gfp_t gfp); void fprop_global_destroy(struct fprop_global *p); bool fprop_new_period(struct fprop_global *p, int periods); @@ -79,7 +80,7 @@ struct fprop_local_percpu { raw_spinlock_t lock; /* Protect period and numerator */ }; -int fprop_local_init_percpu(struct fprop_local_percpu *pl); +int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp); void fprop_local_destroy_percpu(struct fprop_local_percpu *pl); void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl); void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl, diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 26a8a4ed9b07..00e8e8fa7358 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -12,6 +12,7 @@ #include #include #include +#include struct prop_global { /* @@ -40,7 +41,7 @@ struct prop_descriptor { struct mutex mutex; /* serialize the prop_global switch */ }; -int prop_descriptor_init(struct prop_descriptor *pd, int shift); +int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp); void prop_change_shift(struct prop_descriptor *pd, int new_shift); /* @@ -61,7 +62,7 @@ struct prop_local_percpu { raw_spinlock_t lock; /* protect the snapshot state */ }; -int prop_local_init_percpu(struct prop_local_percpu *pl); +int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp); void prop_local_destroy_percpu(struct prop_local_percpu *pl); void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl); void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl, diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index b9d026bfcf38..8f25652f40d4 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -34,13 +34,13 @@ */ #include -int fprop_global_init(struct fprop_global *p) +int fprop_global_init(struct fprop_global *p, gfp_t gfp) { int err; p->period = 0; /* Use 1 to avoid dealing with periods with 0 events... */ - err = percpu_counter_init(&p->events, 1, GFP_KERNEL); + err = percpu_counter_init(&p->events, 1, gfp); if (err) return err; seqcount_init(&p->sequence); @@ -168,11 +168,11 @@ void fprop_fraction_single(struct fprop_global *p, */ #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) -int fprop_local_init_percpu(struct fprop_local_percpu *pl) +int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp) { int err; - err = percpu_counter_init(&pl->events, 0, GFP_KERNEL); + err = percpu_counter_init(&pl->events, 0, gfp); if (err) return err; pl->period = 0; diff --git a/lib/proportions.c b/lib/proportions.c index ca95f8d54384..6f724298f67a 100644 --- a/lib/proportions.c +++ b/lib/proportions.c @@ -73,7 +73,7 @@ #include #include -int prop_descriptor_init(struct prop_descriptor *pd, int shift) +int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp) { int err; @@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift) pd->index = 0; pd->pg[0].shift = shift; mutex_init(&pd->mutex); - err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL); + err = percpu_counter_init(&pd->pg[0].events, 0, gfp); if (err) goto out; - err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL); + err = percpu_counter_init(&pd->pg[1].events, 0, gfp); if (err) percpu_counter_destroy(&pd->pg[0].events); @@ -188,12 +188,12 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) -int prop_local_init_percpu(struct prop_local_percpu *pl) +int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp) { raw_spin_lock_init(&pl->lock); pl->shift = 0; pl->period = 0; - return percpu_counter_init(&pl->events, 0, GFP_KERNEL); + return percpu_counter_init(&pl->events, 0, gfp); } void prop_local_destroy_percpu(struct prop_local_percpu *pl) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f19a818be2d3..64ec49d1772b 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -470,7 +470,7 @@ int bdi_init(struct backing_dev_info *bdi) bdi->write_bandwidth = INIT_BW; bdi->avg_write_bandwidth = INIT_BW; - err = fprop_local_init_percpu(&bdi->completions); + err = fprop_local_init_percpu(&bdi->completions, GFP_KERNEL); if (err) { err: diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 91d73ef1744d..508599403721 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1777,7 +1777,7 @@ void __init page_writeback_init(void) writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); - fprop_global_init(&writeout_completions); + fprop_global_init(&writeout_completions, GFP_KERNEL); } /** -- cgit v1.2.3 From a34375ef9e65340a138fc0be287de5c940d260fc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 09:51:30 +0900 Subject: percpu-refcount: add @gfp to percpu_ref_init() Percpu allocator now supports allocation mask. Add @gfp to percpu_ref_init() so that !GFP_KERNEL allocation masks can be used with percpu_refs too. This patch doesn't make any functional difference. v2: blk-mq conversion was missing. Updated. Signed-off-by: Tejun Heo Cc: Kent Overstreet Cc: Benjamin LaHaise Cc: Li Zefan Cc: Nicholas A. Bellinger Cc: Jens Axboe --- block/blk-mq.c | 3 ++- drivers/target/target_core_tpg.c | 3 ++- fs/aio.c | 4 ++-- include/linux/percpu-refcount.h | 3 ++- kernel/cgroup.c | 6 +++--- lib/percpu-refcount.c | 6 ++++-- 6 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 5189cb1e478a..702df07b980d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1776,7 +1776,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (!q) goto err_hctxs; - if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release)) + if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, + GFP_KERNEL)) goto err_map; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index fddfae61222f..4ab6da338585 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -819,7 +819,8 @@ int core_tpg_add_lun( { int ret; - ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release); + ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release, + GFP_KERNEL); if (ret < 0) return ret; diff --git a/fs/aio.c b/fs/aio.c index bd7ec2cc2674..93fbcc0f5696 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -666,10 +666,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) INIT_LIST_HEAD(&ctx->active_reqs); - if (percpu_ref_init(&ctx->users, free_ioctx_users)) + if (percpu_ref_init(&ctx->users, free_ioctx_users, GFP_KERNEL)) goto err; - if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) + if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, GFP_KERNEL)) goto err; ctx->cpu = alloc_percpu(struct kioctx_cpu); diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 3dfbf237cd8f..ee8325122dbd 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -49,6 +49,7 @@ #include #include #include +#include struct percpu_ref; typedef void (percpu_ref_func_t)(struct percpu_ref *); @@ -66,7 +67,7 @@ struct percpu_ref { }; int __must_check percpu_ref_init(struct percpu_ref *ref, - percpu_ref_func_t *release); + percpu_ref_func_t *release, gfp_t gfp); void percpu_ref_reinit(struct percpu_ref *ref); void percpu_ref_exit(struct percpu_ref *ref); void percpu_ref_kill_and_confirm(struct percpu_ref *ref, diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7dc8788cfd52..589b4d89a0a5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1628,7 +1628,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) goto out; root_cgrp->id = ret; - ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release); + ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, GFP_KERNEL); if (ret) goto out; @@ -4487,7 +4487,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, init_and_link_css(css, ss, cgrp); - err = percpu_ref_init(&css->refcnt, css_release); + err = percpu_ref_init(&css->refcnt, css_release, GFP_KERNEL); if (err) goto err_free_css; @@ -4555,7 +4555,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, goto out_unlock; } - ret = percpu_ref_init(&cgrp->self.refcnt, css_release); + ret = percpu_ref_init(&cgrp->self.refcnt, css_release, GFP_KERNEL); if (ret) goto out_free_cgrp; diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index fe5a3342e960..ff9903264a91 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -40,6 +40,7 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref) * percpu_ref_init - initialize a percpu refcount * @ref: percpu_ref to initialize * @release: function which will be called when refcount hits 0 + * @gfp: allocation mask to use * * Initializes the refcount in single atomic counter mode with a refcount of 1; * analagous to atomic_set(ref, 1). @@ -47,11 +48,12 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref) * Note that @release must not sleep - it may potentially be called from RCU * callback context by percpu_ref_kill(). */ -int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) +int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, + gfp_t gfp) { atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); - ref->pcpu_count_ptr = (unsigned long)alloc_percpu(unsigned); + ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned, gfp); if (!ref->pcpu_count_ptr) return -ENOMEM; -- cgit v1.2.3 From e78c3496790ee8a36522a838b59b388e8a709e65 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 16 Aug 2014 13:40:10 -0400 Subject: time, signal: Protect resource use statistics with seqlock Both times() and clock_gettime(CLOCK_PROCESS_CPUTIME_ID) have scalability issues on large systems, due to both functions being serialized with a lock. The lock protects against reporting a wrong value, due to a thread in the task group exiting, its statistics reporting up to the signal struct, and that exited task's statistics being counted twice (or not at all). Protecting that with a lock results in times() and clock_gettime() being completely serialized on large systems. This can be fixed by using a seqlock around the events that gather and propagate statistics. As an additional benefit, the protection code can be moved into thread_group_cputime(), slightly simplifying the calling functions. In the case of posix_cpu_clock_get_task() things can be simplified a lot, because the calling function already ensures that the task sticks around, and the rest is now taken care of in thread_group_cputime(). This way the statistics reporting code can run lockless. Signed-off-by: Rik van Riel Signed-off-by: Peter Zijlstra (Intel) Cc: Alex Thorlton Cc: Andrew Morton Cc: Daeseok Youn Cc: David Rientjes Cc: Dongsheng Yang Cc: Geert Uytterhoeven Cc: Guillaume Morin Cc: Ionut Alexa Cc: Kees Cook Cc: Linus Torvalds Cc: Li Zefan Cc: Michal Hocko Cc: Michal Schmidt Cc: Oleg Nesterov Cc: Vladimir Davydov Cc: umgwanakikbuti@gmail.com Cc: fweisbec@gmail.com Cc: srao@redhat.com Cc: lwoodman@redhat.com Cc: atheurer@redhat.com Link: http://lkml.kernel.org/r/20140816134010.26a9b572@annuminas.surriel.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/exit.c | 4 ++++ kernel/fork.c | 1 + kernel/sched/cputime.c | 33 ++++++++++++++++++++------------- kernel/sys.c | 2 -- kernel/time/posix-cpu-timers.c | 14 -------------- 6 files changed, 26 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c2c885ee52b..dd9eb4807389 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -645,6 +645,7 @@ struct signal_struct { * Live threads maintain their own counters and add to these * in __exit_signal, except for the group leader. */ + seqlock_t stats_lock; cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; diff --git a/kernel/exit.c b/kernel/exit.c index b93d46dab6fc..fa09b86609db 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -127,6 +127,7 @@ static void __exit_signal(struct task_struct *tsk) * the signal_struct. */ task_cputime(tsk, &utime, &stime); + write_seqlock(&sig->stats_lock); sig->utime += utime; sig->stime += stime; sig->gtime += task_gtime(tsk); @@ -140,6 +141,7 @@ static void __exit_signal(struct task_struct *tsk) sig->sum_sched_runtime += tsk->se.sum_exec_runtime; sig->nr_threads--; __unhash_process(tsk, group_dead); + write_sequnlock(&sig->stats_lock); /* * Do this under ->siglock, we can race with another thread @@ -1042,6 +1044,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) spin_lock_irq(&p->real_parent->sighand->siglock); psig = p->real_parent->signal; sig = p->signal; + write_seqlock(&psig->stats_lock); psig->cutime += tgutime + sig->cutime; psig->cstime += tgstime + sig->cstime; psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime; @@ -1064,6 +1067,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) psig->cmaxrss = maxrss; task_io_accounting_add(&psig->ioac, &p->ioac); task_io_accounting_add(&psig->ioac, &sig->ioac); + write_sequnlock(&psig->stats_lock); spin_unlock_irq(&p->real_parent->sighand->siglock); } diff --git a/kernel/fork.c b/kernel/fork.c index 0cf9cdb6e491..9387ae8ab048 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1068,6 +1068,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->curr_target = tsk; init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); + seqlock_init(&sig->stats_lock); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->real_timer.function = it_real_fn; diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 3e52836359ba..49b7cfe98f7a 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -288,18 +288,28 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) struct signal_struct *sig = tsk->signal; cputime_t utime, stime; struct task_struct *t; - - times->utime = sig->utime; - times->stime = sig->stime; - times->sum_exec_runtime = sig->sum_sched_runtime; + unsigned int seq, nextseq; rcu_read_lock(); - for_each_thread(tsk, t) { - task_cputime(t, &utime, &stime); - times->utime += utime; - times->stime += stime; - times->sum_exec_runtime += task_sched_runtime(t); - } + /* Attempt a lockless read on the first round. */ + nextseq = 0; + do { + seq = nextseq; + read_seqbegin_or_lock(&sig->stats_lock, &seq); + times->utime = sig->utime; + times->stime = sig->stime; + times->sum_exec_runtime = sig->sum_sched_runtime; + + for_each_thread(tsk, t) { + task_cputime(t, &utime, &stime); + times->utime += utime; + times->stime += stime; + times->sum_exec_runtime += task_sched_runtime(t); + } + /* If lockless access failed, take the lock. */ + nextseq = 1; + } while (need_seqretry(&sig->stats_lock, seq)); + done_seqretry(&sig->stats_lock, seq); rcu_read_unlock(); } @@ -611,9 +621,6 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) cputime_adjust(&cputime, &p->prev_cputime, ut, st); } -/* - * Must be called with siglock held. - */ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) { struct task_cputime cputime; diff --git a/kernel/sys.c b/kernel/sys.c index ce8129192a26..b6636643cbd1 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -862,11 +862,9 @@ void do_sys_times(struct tms *tms) { cputime_t tgutime, tgstime, cutime, cstime; - spin_lock_irq(¤t->sighand->siglock); thread_group_cputime_adjusted(current, &tgutime, &tgstime); cutime = current->signal->cutime; cstime = current->signal->cstime; - spin_unlock_irq(¤t->sighand->siglock); tms->tms_utime = cputime_to_clock_t(tgutime); tms->tms_stime = cputime_to_clock_t(tgstime); tms->tms_cutime = cputime_to_clock_t(cutime); diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 3b8946416a5f..492b986195d5 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -272,22 +272,8 @@ static int posix_cpu_clock_get_task(struct task_struct *tsk, if (same_thread_group(tsk, current)) err = cpu_clock_sample(which_clock, tsk, &rtn); } else { - unsigned long flags; - struct sighand_struct *sighand; - - /* - * while_each_thread() is not yet entirely RCU safe, - * keep locking the group while sampling process - * clock for now. - */ - sighand = lock_task_sighand(tsk, &flags); - if (!sighand) - return err; - if (tsk == current || thread_group_leader(tsk)) err = cpu_clock_sample_group(which_clock, tsk, &rtn); - - unlock_task_sighand(tsk, &flags); } if (!err) -- cgit v1.2.3 From 0b750b3baa2d64f1b77aecc10f20deeb28efe60d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 5 Sep 2014 18:08:47 +0200 Subject: HID: usbhid: add always-poll quirk Add quirk to make sure that a device is always polled for input events even if it hasn't been opened. This is needed for devices that disconnects from the bus unless the interrupt endpoint has been polled at least once or when not responding to an input event (e.g. after having shut down X). Signed-off-by: Johan Hovold Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hid-core.c | 26 +++++++++++++++++++++++--- include/linux/hid.h | 1 + 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 79cf503e37bf..ddd547ad6d7e 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -82,7 +82,7 @@ static int hid_start_in(struct hid_device *hid) struct usbhid_device *usbhid = hid->driver_data; spin_lock_irqsave(&usbhid->lock, flags); - if (hid->open > 0 && + if ((hid->open > 0 || hid->quirks & HID_QUIRK_ALWAYS_POLL) && !test_bit(HID_DISCONNECTED, &usbhid->iofl) && !test_bit(HID_SUSPENDED, &usbhid->iofl) && !test_and_set_bit(HID_IN_RUNNING, &usbhid->iofl)) { @@ -292,6 +292,8 @@ static void hid_irq_in(struct urb *urb) case 0: /* success */ usbhid_mark_busy(usbhid); usbhid->retry_delay = 0; + if ((hid->quirks & HID_QUIRK_ALWAYS_POLL) && !hid->open) + break; hid_input_report(urb->context, HID_INPUT_REPORT, urb->transfer_buffer, urb->actual_length, 1); @@ -735,8 +737,10 @@ void usbhid_close(struct hid_device *hid) if (!--hid->open) { spin_unlock_irq(&usbhid->lock); hid_cancel_delayed_stuff(usbhid); - usb_kill_urb(usbhid->urbin); - usbhid->intf->needs_remote_wakeup = 0; + if (!(hid->quirks & HID_QUIRK_ALWAYS_POLL)) { + usb_kill_urb(usbhid->urbin); + usbhid->intf->needs_remote_wakeup = 0; + } } else { spin_unlock_irq(&usbhid->lock); } @@ -1134,6 +1138,19 @@ static int usbhid_start(struct hid_device *hid) set_bit(HID_STARTED, &usbhid->iofl); + if (hid->quirks & HID_QUIRK_ALWAYS_POLL) { + ret = usb_autopm_get_interface(usbhid->intf); + if (ret) + goto fail; + usbhid->intf->needs_remote_wakeup = 1; + ret = hid_start_in(hid); + if (ret) { + dev_err(&hid->dev, + "failed to start in urb: %d\n", ret); + } + usb_autopm_put_interface(usbhid->intf); + } + /* Some keyboards don't work until their LEDs have been set. * Since BIOSes do set the LEDs, it must be safe for any device * that supports the keyboard boot protocol. @@ -1166,6 +1183,9 @@ static void usbhid_stop(struct hid_device *hid) if (WARN_ON(!usbhid)) return; + if (hid->quirks & HID_QUIRK_ALWAYS_POLL) + usbhid->intf->needs_remote_wakeup = 0; + clear_bit(HID_STARTED, &usbhid->iofl); spin_lock_irq(&usbhid->lock); /* Sync with error and led handlers */ set_bit(HID_DISCONNECTED, &usbhid->iofl); diff --git a/include/linux/hid.h b/include/linux/hid.h index f53c4a9cca1d..26ee25fced27 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -287,6 +287,7 @@ struct hid_item { #define HID_QUIRK_HIDINPUT_FORCE 0x00000080 #define HID_QUIRK_NO_EMPTY_INPUT 0x00000100 #define HID_QUIRK_NO_INIT_INPUT_REPORTS 0x00000200 +#define HID_QUIRK_ALWAYS_POLL 0x00000400 #define HID_QUIRK_SKIP_OUTPUT_REPORTS 0x00010000 #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID 0x00020000 #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP 0x00040000 -- cgit v1.2.3 From c8d6591752e96c550cb98b781326d72d8eedcc79 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Wed, 3 Sep 2014 06:48:37 -0700 Subject: mac80211: support DTPC IE (from Cisco Client eXtensions) Linux already supports 802.11h, where the access point can tell the client to reduce its transmission power. However, 802.11h is only defined for 5 GHz, where the need for this is much smaller than on 2.4 GHz. Cisco has their own solution, called DTPC (Dynamic Transmit Power Control). Cisco APs on a controller sometimes but not always send 802.11h; they always send DTPC, even on 2.4 GHz. This patch adds support for parsing and honoring the DTPC IE in addition to the 802.11h element (they do not always contain the same limits, so both must be honored); the format is not documented, but very simple. Tested (on top of wireless.git and on 3.16.1) against a Cisco Aironet 1142 joined to a Cisco 2504 WLC, by setting various transmit power levels for the given access points and observing the results. The Wireshark 802.11 dissector agrees with the interpretation of the element, except for negative numbers, which seem to never happen anyway. Signed-off-by: Steinar H. Gunderson Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 ++- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 60 +++++++++++++++++++++++++++++++++++++--------- net/mac80211/util.c | 25 +++++++++++++++++++ 4 files changed, 77 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 77d4f26e0235..61a09f0644aa 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1824,7 +1824,8 @@ enum ieee80211_eid { WLAN_EID_DMG_TSPEC = 146, WLAN_EID_DMG_AT = 147, WLAN_EID_DMG_CAP = 148, - /* 149-150 reserved for Cisco */ + /* 149 reserved for Cisco */ + WLAN_EID_CISCO_VENDOR_SPECIFIC = 150, WLAN_EID_DMG_OPERATION = 151, WLAN_EID_DMG_BSS_PARAM_CHANGE = 152, WLAN_EID_DMG_BEAM_REFINEMENT = 153, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6b7bdd8fae29..c2aaec4dfcf0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1369,6 +1369,7 @@ struct ieee802_11_elems { const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; const u8 *country_elem; const u8 *pwr_constr_elem; + const u8 *cisco_dtpc_elem; const struct ieee80211_timeout_interval_ie *timeout_int; const u8 *opmode_notif; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0b812a88f95f..a7b92f5f7161 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1230,14 +1230,30 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, return have_chan_pwr; } +static void ieee80211_find_cisco_dtpc(struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel *channel, + const u8 *cisco_dtpc_ie, + int *pwr_level) +{ + /* From practical testing, the first data byte of the DTPC element + * seems to contain the requested dBm level, and the CLI on Cisco + * APs clearly state the range is -127 to 127 dBm, which indicates + * a signed byte, although it seemingly never actually goes negative. + * The other byte seems to always be zero. + */ + *pwr_level = (__s8)cisco_dtpc_ie[4]; +} + static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, struct ieee80211_mgmt *mgmt, const u8 *country_ie, u8 country_ie_len, - const u8 *pwr_constr_ie) + const u8 *pwr_constr_ie, + const u8 *cisco_dtpc_ie) { - bool has_80211h_pwr = false; - int chan_pwr, pwr_reduction_80211h; + bool has_80211h_pwr = false, has_cisco_pwr = false; + int chan_pwr = 0, pwr_reduction_80211h = 0; + int pwr_level_cisco, pwr_level_80211h; int new_ap_level; if (country_ie && pwr_constr_ie && @@ -1246,16 +1262,35 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, has_80211h_pwr = ieee80211_find_80211h_pwr_constr( sdata, channel, country_ie, country_ie_len, pwr_constr_ie, &chan_pwr, &pwr_reduction_80211h); - new_ap_level = max_t(int, 0, chan_pwr - pwr_reduction_80211h); + pwr_level_80211h = + max_t(int, 0, chan_pwr - pwr_reduction_80211h); + } + + if (cisco_dtpc_ie) { + ieee80211_find_cisco_dtpc( + sdata, channel, cisco_dtpc_ie, &pwr_level_cisco); + has_cisco_pwr = true; } - if (!has_80211h_pwr) + if (!has_80211h_pwr && !has_cisco_pwr) return 0; - sdata_info(sdata, - "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n", - new_ap_level, chan_pwr, pwr_reduction_80211h, - sdata->u.mgd.bssid); + /* If we have both 802.11h and Cisco DTPC, apply both limits + * by picking the smallest of the two power levels advertised. + */ + if (has_80211h_pwr && + (!has_cisco_pwr || pwr_level_80211h <= pwr_level_cisco)) { + sdata_info(sdata, + "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n", + pwr_level_80211h, chan_pwr, pwr_reduction_80211h, + sdata->u.mgd.bssid); + new_ap_level = pwr_level_80211h; + } else { /* has_cisco_pwr is always true here. */ + sdata_info(sdata, + "Limiting TX power to %d dBm as advertised by %pM\n", + pwr_level_cisco, sdata->u.mgd.bssid); + new_ap_level = pwr_level_cisco; + } if (sdata->ap_power_level == new_ap_level) return 0; @@ -2923,7 +2958,9 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, /* * This is the canonical list of information elements we care about, * the filter code also gives us all changes to the Microsoft OUI - * (00:50:F2) vendor IE which is used for WMM which we need to track. + * (00:50:F2) vendor IE which is used for WMM which we need to track, + * as well as the DTPC IE (part of the Cisco OUI) used for signaling + * changes to requested client power. * * We implement beacon filtering in software since that means we can * avoid processing the frame here and in cfg80211, and userspace @@ -3232,7 +3269,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt, elems.country_elem, elems.country_elem_len, - elems.pwr_constr_elem); + elems.pwr_constr_elem, + elems.cisco_dtpc_elem); ieee80211_bss_info_change_notify(sdata, changed); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b444f27a788e..3c61060a4d2b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1015,6 +1015,31 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, } elems->pwr_constr_elem = pos; break; + case WLAN_EID_CISCO_VENDOR_SPECIFIC: + /* Lots of different options exist, but we only care + * about the Dynamic Transmit Power Control element. + * First check for the Cisco OUI, then for the DTPC + * tag (0x00). + */ + if (elen < 4) { + elem_parse_failed = true; + break; + } + + if (pos[0] != 0x00 || pos[1] != 0x40 || + pos[2] != 0x96 || pos[3] != 0x00) + break; + + if (elen != 6) { + elem_parse_failed = true; + break; + } + + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + + elems->cisco_dtpc_elem = pos; + break; case WLAN_EID_TIMEOUT_INTERVAL: if (elen >= sizeof(struct ieee80211_timeout_interval_ie)) elems->timeout_int = (void *)pos; -- cgit v1.2.3 From 4930d247af29f849cd1bddd65be2400684dc886e Mon Sep 17 00:00:00 2001 From: Gaël PORTAY Date: Sat, 6 Sep 2014 19:52:35 +0200 Subject: ARM: at91/tclib: move initialization from alloc to probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move resource retrieval from atmel_tc_alloc to tc_probe to avoid lately reporting resource related issues when a TC block user request a TC block. Moreover, resources retrieval are usually done in the probe function, thus moving them add some consistency with other drivers. Initialization is done once, ie not every time a tc block is requested. If it fails, the device is not appended to the list of tc blocks. Furhermore, the device id is retrieved at probe as well, avoiding parsing DT every time the user requests of tc block. Signed-off-by: Gaël PORTAY Acked-by: Thierry Reding Acked-by: Boris Brezillon Signed-off-by: Nicolas Ferre --- drivers/clocksource/tcb_clksrc.c | 2 +- drivers/misc/atmel_tclib.c | 71 +++++++++++++--------------------------- drivers/pwm/pwm-atmel-tcb.c | 2 +- include/linux/atmel_tc.h | 8 +++-- 4 files changed, 29 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index a8d7ea14f183..f922e81d531b 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -279,7 +279,7 @@ static int __init tcb_clksrc_init(void) int i; int ret; - tc = atmel_tc_alloc(CONFIG_ATMEL_TCB_CLKSRC_BLOCK, clksrc.name); + tc = atmel_tc_alloc(CONFIG_ATMEL_TCB_CLKSRC_BLOCK); if (!tc) { pr_debug("can't alloc TC for clocksource\n"); return -ENODEV; diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c index b514a2d4485b..d505d1e0857b 100644 --- a/drivers/misc/atmel_tclib.c +++ b/drivers/misc/atmel_tclib.c @@ -35,60 +35,31 @@ static LIST_HEAD(tc_list); /** * atmel_tc_alloc - allocate a specified TC block * @block: which block to allocate - * @name: name to be associated with the iomem resource * * Caller allocates a block. If it is available, a pointer to a * pre-initialized struct atmel_tc is returned. The caller can access * the registers directly through the "regs" field. */ -struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name) +struct atmel_tc *atmel_tc_alloc(unsigned block) { struct atmel_tc *tc; struct platform_device *pdev = NULL; - struct resource *r; - size_t size; spin_lock(&tc_list_lock); list_for_each_entry(tc, &tc_list, node) { - if (tc->pdev->dev.of_node) { - if (of_alias_get_id(tc->pdev->dev.of_node, "tcb") - == block) { - pdev = tc->pdev; - break; - } - } else if (tc->pdev->id == block) { + if (tc->allocated) + continue; + + if ((tc->pdev->dev.of_node && tc->id == block) || + (tc->pdev->id == block)) { pdev = tc->pdev; + tc->allocated = true; break; } } - - if (!pdev || tc->iomem) - goto fail; - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) - goto fail; - - size = resource_size(r); - r = request_mem_region(r->start, size, name); - if (!r) - goto fail; - - tc->regs = ioremap(r->start, size); - if (!tc->regs) - goto fail_ioremap; - - tc->iomem = r; - -out: spin_unlock(&tc_list_lock); - return tc; -fail_ioremap: - release_mem_region(r->start, size); -fail: - tc = NULL; - goto out; + return pdev ? tc : NULL; } EXPORT_SYMBOL_GPL(atmel_tc_alloc); @@ -96,19 +67,14 @@ EXPORT_SYMBOL_GPL(atmel_tc_alloc); * atmel_tc_free - release a specified TC block * @tc: Timer/counter block that was returned by atmel_tc_alloc() * - * This reverses the effect of atmel_tc_alloc(), unmapping the I/O - * registers, invalidating the resource returned by that routine and - * making the TC available to other drivers. + * This reverses the effect of atmel_tc_alloc(), invalidating the resource + * returned by that routine and making the TC available to other drivers. */ void atmel_tc_free(struct atmel_tc *tc) { spin_lock(&tc_list_lock); - if (tc->regs) { - iounmap(tc->regs); - release_mem_region(tc->iomem->start, resource_size(tc->iomem)); - tc->regs = NULL; - tc->iomem = NULL; - } + if (tc->allocated) + tc->allocated = false; spin_unlock(&tc_list_lock); } EXPORT_SYMBOL_GPL(atmel_tc_free); @@ -142,9 +108,7 @@ static int __init tc_probe(struct platform_device *pdev) struct atmel_tc *tc; struct clk *clk; int irq; - - if (!platform_get_resource(pdev, IORESOURCE_MEM, 0)) - return -EINVAL; + struct resource *r; irq = platform_get_irq(pdev, 0); if (irq < 0) @@ -160,12 +124,21 @@ static int __init tc_probe(struct platform_device *pdev) if (IS_ERR(clk)) return PTR_ERR(clk); + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + tc->regs = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(tc->regs)) + return PTR_ERR(tc->regs); + /* Now take SoC information if available */ if (pdev->dev.of_node) { const struct of_device_id *match; match = of_match_node(atmel_tcb_dt_ids, pdev->dev.of_node); if (match) tc->tcb_config = match->data; + + tc->id = of_alias_get_id(tc->pdev->dev.of_node, "tcb"); + } else { + tc->id = pdev->id; } tc->clk[0] = clk; diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c index f3dcd02390f1..d56e5b717431 100644 --- a/drivers/pwm/pwm-atmel-tcb.c +++ b/drivers/pwm/pwm-atmel-tcb.c @@ -379,7 +379,7 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev) return err; } - tc = atmel_tc_alloc(tcblock, "tcb-pwm"); + tc = atmel_tc_alloc(tcblock); if (tc == NULL) { dev_err(&pdev->dev, "failed to allocate Timer Counter Block\n"); return -ENOMEM; diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h index 89a931babecf..d8aa88461c64 100644 --- a/include/linux/atmel_tc.h +++ b/include/linux/atmel_tc.h @@ -44,12 +44,13 @@ struct atmel_tcb_config { /** * struct atmel_tc - information about a Timer/Counter Block * @pdev: physical device - * @iomem: resource associated with the I/O register * @regs: mapping through which the I/O registers can be accessed + * @id: block id * @tcb_config: configuration data from SoC * @irq: irq for each of the three channels * @clk: internal clock source for each of the three channels * @node: list node, for tclib internal use + * @allocated: if already used, for tclib internal use * * On some platforms, each TC channel has its own clocks and IRQs, * while on others, all TC channels share the same clock and IRQ. @@ -61,15 +62,16 @@ struct atmel_tcb_config { */ struct atmel_tc { struct platform_device *pdev; - struct resource *iomem; void __iomem *regs; + int id; const struct atmel_tcb_config *tcb_config; int irq[3]; struct clk *clk[3]; struct list_head node; + bool allocated; }; -extern struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name); +extern struct atmel_tc *atmel_tc_alloc(unsigned block); extern void atmel_tc_free(struct atmel_tc *tc); /* platform-specific ATMEL_TC_TIMER_CLOCKx divisors (0 means 32KiHz) */ -- cgit v1.2.3 From 84f462371cc07272a17e2ae96c3540f795db273a Mon Sep 17 00:00:00 2001 From: Gaël PORTAY Date: Sat, 6 Sep 2014 19:52:36 +0200 Subject: ARM: at91/tclib: mask interruptions at shutdown and probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shutdown properly the timer counter block by masking interruptions. Otherwise, a segmentation may happen when kexec-ing a new kernel (see backtrace below). An interruption may happen before the handler is set, leading to a kernel segmentation fault. Furthermore, we make sure the interruptions are masked when the driver is initialized. This will prevent freshly kexec-ed kernel from crashing when launched from a kernel which does not properly mask interruptions at shutdown. The backtrace below happened after kexec-ing a new kernel, from a kernel that did not shut down properly leaving interruptions unmasked. Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c0004000 [00000000] *pgd=00000000 Internal error: Oops: 80000005 [#1] ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 3.16.0+ #144 task: c1828aa0 ti: c182a000 task.ti: c182a000 PC is at 0x0 LR is at ch2_irq+0x28/0x30 pc : [<00000000>] lr : [] psr: 000000d3 sp : c182bd38 ip : c182bd48 fp : c182bd44 r10: c0373390 r9 : c1825b00 r8 : 60000053 r7 : 00000000 r6 : 00000000 r5 : 00000013 r4 : c036e800 r3 : 00000000 r2 : 00002004 r1 : c036e760 r0 : c036e760 Flags: nzcv IRQs off FIQs off Mode SVC_32 ISA ARM Segment kernel Control: 0005317f Table: 20004000 DAC: 00000017 Process swapper (pid: 1, stack limit = 0xc182a1c0) Stack: (0xc182bd38 to 0xc182c000) bd20: c182bd7c c182bd48 bd40: c0045430 c01db8ec 00000000 c18c6f40 c182bd74 c1825b00 c035cec4 00000000 bd60: c182be2c 60000053 c1825b34 00000000 c182bd94 c182bd80 c0045570 c0045408 bd80: 00000000 c1825b00 c182bdac c182bd98 c0047f34 c0045550 00000013 c036619c bda0: c182bdc4 c182bdb0 c0044da4 c0047e98 0000007f 00000013 c182bde4 c182bdc8 bdc0: c0009e34 c0044d8c fefff000 c0046728 60000053 ffffffff c182bdf4 c182bde8 bde0: c00086a8 c0009ddc c182be74 c182bdf8 c000cb80 c0008674 00000000 00000013 be00: 00000000 00014200 c1825b00 c036e800 00000013 c035ed98 60000053 c1825b34 be20: 00000000 c182be74 c182be20 c182be40 c0047994 c0046728 60000053 ffffffff be40: 00000013 c036e800 c182be64 c1825b00 00000013 c036e800 c035ed98 c03874bc be60: 00000004 c036e700 c182be94 c182be78 c004689c c0046398 c036e760 c18c6080 be80: 00000000 c035ed10 c182bedc c182be98 c0348b08 c004684c 0000000c c034dac8 bea0: 004c4b3f c028c338 c036e760 00000013 c014ecc8 c18e67e0 c035b9c0 c0348884 bec0: c035b9c0 c182a020 00000000 00000000 c182bf54 c182bee0 c00089fc c0348894 bee0: c00da51c c1ffcc78 c182bf0c c182bef8 c002d100 c002d09c c1ffcc78 00000000 bf00: c182bf54 c182bf10 c002d308 c0336570 c182bf3c c0334e44 00000003 00000003 bf20: 00000030 c0334b44 c0044d74 00000003 00000003 c034dac8 c0350a94 c0373440 bf40: c0373440 00000030 c182bf94 c182bf58 c0336d24 c000890c 00000003 00000003 bf60: c0336560 c182bf64 c182bf64 6e616e0d 00000000 c0272fc8 00000000 00000000 bf80: 00000000 00000000 c182bfac c182bf98 c0272fd8 c0336bd8 c182a000 00000000 bfa0: 00000000 c182bfb0 c00095d0 c0272fd8 00000000 00000000 00000000 00000000 bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 374d27cd 33cc33e4 Backtrace: [] (ch2_irq) from [] (handle_irq_event_percpu+0x38/0x148) [] (handle_irq_event_percpu) from [] (handle_irq_event+0x30/0x40) r10:00000000 r9:c1825b34 r8:60000053 r7:c182be2c r6:00000000 r5:c035cec4 r4:c1825b00 [] (handle_irq_event) from [] (handle_fasteoi_irq+0xac/0x11c) r4:c1825b00 r3:00000000 [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x28/0x38) r5:c036619c r4:00000013 [] (generic_handle_irq) from [] (handle_IRQ+0x68/0x88) r4:00000013 r3:0000007f [] (handle_IRQ) from [] (at91_aic_handle_irq+0x44/0x4c) r6:ffffffff r5:60000053 r4:c0046728 r3:fefff000 [] (at91_aic_handle_irq) from [] (__irq_svc+0x40/0x4c) Exception stack(0xc182bdf8 to 0xc182be40) bde0: 00000000 00000013 be00: 00000000 00014200 c1825b00 c036e800 00000013 c035ed98 60000053 c1825b34 be20: 00000000 c182be74 c182be20 c182be40 c0047994 c0046728 60000053 ffffffff [] (__setup_irq) from [] (setup_irq+0x60/0x8c) r10:c036e700 r9:00000004 r8:c03874bc r7:c035ed98 r6:c036e800 r5:00000013 r4:c1825b00 [] (setup_irq) from [] (tcb_clksrc_init+0x284/0x31c) r6:c035ed10 r5:00000000 r4:c18c6080 r3:c036e760 [] (tcb_clksrc_init) from [] (do_one_initcall+0x100/0x1b4) r10:00000000 r9:00000000 r8:c182a020 r7:c035b9c0 r6:c0348884 r5:c035b9c0 r4:c18e67e0 [] (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x224) r9:00000030 r8:c0373440 r7:c0373440 r6:c0350a94 r5:c034dac8 r4:00000003 [] (kernel_init_freeable) from [] (kernel_init+0x10/0xec) r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:c0272fc8 r4:00000000 [] (kernel_init) from [] (ret_from_fork+0x14/0x24) r4:00000000 r3:c182a000 Code: bad PC value ---[ end trace 5b30f0017e282e47 ]--- Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Gaël PORTAY Acked-by: Boris Brezillon Acked-by: Daniel Lezcano Signed-off-by: Nicolas Ferre --- drivers/misc/atmel_tclib.c | 16 ++++++++++++++++ include/linux/atmel_tc.h | 5 +++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c index d505d1e0857b..0ca05c3ec8d6 100644 --- a/drivers/misc/atmel_tclib.c +++ b/drivers/misc/atmel_tclib.c @@ -109,6 +109,7 @@ static int __init tc_probe(struct platform_device *pdev) struct clk *clk; int irq; struct resource *r; + unsigned int i; irq = platform_get_irq(pdev, 0); if (irq < 0) @@ -157,18 +158,33 @@ static int __init tc_probe(struct platform_device *pdev) if (tc->irq[2] < 0) tc->irq[2] = irq; + for (i = 0; i < 3; i++) + writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR)); + spin_lock(&tc_list_lock); list_add_tail(&tc->node, &tc_list); spin_unlock(&tc_list_lock); + platform_set_drvdata(pdev, tc); + return 0; } +static void tc_shutdown(struct platform_device *pdev) +{ + int i; + struct atmel_tc *tc = platform_get_drvdata(pdev); + + for (i = 0; i < 3; i++) + writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR)); +} + static struct platform_driver tc_driver = { .driver = { .name = "atmel_tcb", .of_match_table = of_match_ptr(atmel_tcb_dt_ids), }, + .shutdown = tc_shutdown, }; static int __init tc_init(void) diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h index d8aa88461c64..b87c1c7c242a 100644 --- a/include/linux/atmel_tc.h +++ b/include/linux/atmel_tc.h @@ -260,5 +260,10 @@ extern const u8 atmel_tc_divisors[5]; #define ATMEL_TC_LDRAS (1 << 5) /* RA loading */ #define ATMEL_TC_LDRBS (1 << 6) /* RB loading */ #define ATMEL_TC_ETRGS (1 << 7) /* external trigger */ +#define ATMEL_TC_ALL_IRQ (ATMEL_TC_COVFS | ATMEL_TC_LOVRS | \ + ATMEL_TC_CPAS | ATMEL_TC_CPBS | \ + ATMEL_TC_CPCS | ATMEL_TC_LDRAS | \ + ATMEL_TC_LDRBS | ATMEL_TC_ETRGS) \ + /* all IRQs */ #endif -- cgit v1.2.3 From ff9ea323816dc1c8ac7144afd4eab3ac97704430 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:03:56 +0900 Subject: block, bdi: an active gendisk always has a request_queue associated with it bdev_get_queue() returns the request_queue associated with the specified block_device. blk_get_backing_dev_info() makes use of bdev_get_queue() to determine the associated bdi given a block_device. All the callers of bdev_get_queue() including blk_get_backing_dev_info() assume that bdev_get_queue() may return NULL and implement NULL handling; however, bdev_get_queue() requires the passed in block_device is opened and attached to its gendisk. Because an active gendisk always has a valid request_queue associated with it, bdev_get_queue() can never return NULL and neither can blk_get_backing_dev_info(). Make it clear that neither of the two functions can return NULL and remove NULL handling from all the callers. Signed-off-by: Tejun Heo Cc: Chris Mason Cc: Dave Chinner Signed-off-by: Jens Axboe --- block/blk-core.c | 10 +++------- block/compat_ioctl.c | 4 ---- block/ioctl.c | 4 ---- fs/block_dev.c | 2 -- fs/btrfs/disk-io.c | 2 +- fs/xfs/xfs_buf.c | 2 -- include/linux/blkdev.h | 2 +- 7 files changed, 5 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 93603e6ff479..817446175489 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -83,18 +83,14 @@ void blk_queue_congestion_threshold(struct request_queue *q) * @bdev: device * * Locates the passed device's request queue and returns the address of its - * backing_dev_info - * - * Will return NULL if the request queue cannot be located. + * backing_dev_info. This function can only be called if @bdev is opened + * and the return value is never NULL. */ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) { - struct backing_dev_info *ret = NULL; struct request_queue *q = bdev_get_queue(bdev); - if (q) - ret = &q->backing_dev_info; - return ret; + return &q->backing_dev_info; } EXPORT_SYMBOL(blk_get_backing_dev_info); diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 18b282ce361e..f678c733df40 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -709,8 +709,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (!arg) return -EINVAL; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; return compat_put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); case BLKROGET: /* compatible */ @@ -731,8 +729,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EACCES; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; return 0; case BLKGETSIZE: diff --git a/block/ioctl.c b/block/ioctl.c index d6cda8147c91..6c7bf903742f 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -356,8 +356,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, if (!arg) return -EINVAL; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); case BLKROGET: return put_int(arg, bdev_read_only(bdev) != 0); @@ -386,8 +384,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, if(!capable(CAP_SYS_ADMIN)) return -EACCES; bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - return -ENOTTY; bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; return 0; case BLKBSZSET: diff --git a/fs/block_dev.c b/fs/block_dev.c index 6d7274619bf9..d3251eca6429 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1173,8 +1173,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - bdi = &default_backing_dev_info; bdev_inode_switch_bdi(bdev->bd_inode, bdi); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d0ed9e664f7d..39ff591ae1b4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1694,7 +1694,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) if (!device->bdev) continue; bdi = blk_get_backing_dev_info(device->bdev); - if (bdi && bdi_congested(bdi, bdi_bits)) { + if (bdi_congested(bdi, bdi_bits)) { ret = 1; break; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index cd7b8ca9b064..497fcde381d7 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1678,8 +1678,6 @@ xfs_alloc_buftarg( btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; btp->bt_bdi = blk_get_backing_dev_info(bdev); - if (!btp->bt_bdi) - goto error; if (xfs_setsize_buftarg_early(btp, bdev)) goto error; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 518b46555b80..e267bf0db559 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,7 +865,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { - return bdev->bd_disk->queue; + return bdev->bd_disk->queue; /* this is never NULL */ } /* -- cgit v1.2.3 From e36f1dfce0b45d347927568efe1088821758cc3c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:03:57 +0900 Subject: bdi: remove unused stuff Two flags and one bdi_writeback field are no longer used. Remove them. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e488e9459a93..2103a7fa3fd8 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -28,12 +28,10 @@ struct dentry; * Bits in backing_dev_info.state */ enum bdi_state { - BDI_wb_alloc, /* Default embedded wb allocated */ BDI_async_congested, /* The async (write) queue is getting full */ BDI_sync_congested, /* The sync queue is getting full */ BDI_registered, /* bdi_register() was done */ BDI_writeback_running, /* Writeback is in progress */ - BDI_unused, /* Available bits start here */ }; typedef int (congested_fn)(void *, int); @@ -50,7 +48,6 @@ enum bdi_stat_item { struct bdi_writeback { struct backing_dev_info *bdi; /* our parent bdi */ - unsigned int nr; unsigned long last_old_flush; /* last old data flush */ -- cgit v1.2.3 From 018a17bdc8658ad448497c84d4ba21b6985820ec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:04:01 +0900 Subject: bdi: reimplement bdev_inode_switch_bdi() A block_device may be attached to different gendisks and thus different bdis over time. bdev_inode_switch_bdi() is used to switch the associated bdi. The function assumes that the inode could be dirty and transfers it between bdis if so. This is a bit nasty in that it reaches into bdi internals. This patch reimplements the function so that it writes out the inode if dirty. This is a lot simpler and can be implemented without exposing bdi internals. Signed-off-by: Tejun Heo Cc: Alexander Viro Signed-off-by: Jens Axboe --- fs/block_dev.c | 32 +++++++++++--------------------- include/linux/backing-dev.h | 1 - mm/backing-dev.c | 2 +- 3 files changed, 12 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/fs/block_dev.c b/fs/block_dev.c index d3251eca6429..cc8d68ac29aa 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -50,32 +50,22 @@ inline struct block_device *I_BDEV(struct inode *inode) EXPORT_SYMBOL(I_BDEV); /* - * Move the inode from its current bdi to a new bdi. If the inode is dirty we - * need to move it onto the dirty list of @dst so that the inode is always on - * the right list. + * Move the inode from its current bdi to a new bdi. Make sure the inode + * is clean before moving so that it doesn't linger on the old bdi. */ static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { - struct backing_dev_info *old = inode->i_data.backing_dev_info; - bool wakeup_bdi = false; - - if (unlikely(dst == old)) /* deadlock avoidance */ - return; - bdi_lock_two(&old->wb, &dst->wb); - spin_lock(&inode->i_lock); - inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) { - if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) - wakeup_bdi = true; - list_move(&inode->i_wb_list, &dst->wb.b_dirty); + while (true) { + spin_lock(&inode->i_lock); + if (!(inode->i_state & I_DIRTY)) { + inode->i_data.backing_dev_info = dst; + spin_unlock(&inode->i_lock); + return; + } + spin_unlock(&inode->i_lock); + WARN_ON_ONCE(write_inode_now(inode, true)); } - spin_unlock(&inode->i_lock); - spin_unlock(&old->wb.list_lock); - spin_unlock(&dst->wb.list_lock); - - if (wakeup_bdi) - bdi_wakeup_thread_delayed(dst); } /* Kill _all_ buffers and pagecache , dirty or not.. */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2103a7fa3fd8..5da6012b7a14 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -121,7 +121,6 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi); void bdi_writeback_workfn(struct work_struct *work); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); -void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); extern spinlock_t bdi_lock; extern struct list_head bdi_list; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index b65fe93ad612..7d63d5e9d3de 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -40,7 +40,7 @@ LIST_HEAD(bdi_list); /* bdi_wq serves all asynchronous writeback tasks */ struct workqueue_struct *bdi_wq; -void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) +static void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) { if (wb1 < wb2) { spin_lock(&wb1->list_lock); -- cgit v1.2.3 From e676253b19b2d269cccf67fdb1592120a0cd0676 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Tue, 5 Aug 2014 11:45:59 +0200 Subject: serial/8250: Add support for RS485 IOCTLs This patch allow the users of the 8250 infrastructure to define a handler for RS485 configration. If no handler is defined the 8250 driver will work as usual. Signed-off-by: Ricardo Ribalda Delgado Acked-by: Alan Cox -- v2:Change suggested by Alan "One Thousand Gnomes": - Move rs485 structure further down on the uart_8250_port structure drivers/tty/serial/8250/8250_core.c | 39 +++++++++++++++++++++++++++++++++++++ include/linux/serial_8250.h | 3 +++ 2 files changed, 42 insertions(+) Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 39 +++++++++++++++++++++++++++++++++++++ include/linux/serial_8250.h | 3 +++ 2 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 1d42dba6121d..b28ed1beb50f 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -2843,6 +2843,42 @@ serial8250_verify_port(struct uart_port *port, struct serial_struct *ser) return 0; } +static int serial8250_ioctl(struct uart_port *port, unsigned int cmd, + unsigned long arg) +{ + struct uart_8250_port *up = + container_of(port, struct uart_8250_port, port); + int ret; + struct serial_rs485 rs485_config; + + if (!up->rs485_config) + return -ENOIOCTLCMD; + + switch (cmd) { + case TIOCSRS485: + if (copy_from_user(&rs485_config, (void __user *)arg, + sizeof(rs485_config))) + return -EFAULT; + + ret = up->rs485_config(up, &rs485_config); + if (ret) + return ret; + + memcpy(&up->rs485, &rs485_config, sizeof(rs485_config)); + + return 0; + case TIOCGRS485: + if (copy_to_user((void __user *)arg, &up->rs485, + sizeof(up->rs485))) + return -EFAULT; + return 0; + default: + break; + } + + return -ENOIOCTLCMD; +} + static const char * serial8250_type(struct uart_port *port) { @@ -2872,6 +2908,7 @@ static struct uart_ops serial8250_pops = { .request_port = serial8250_request_port, .config_port = serial8250_config_port, .verify_port = serial8250_verify_port, + .ioctl = serial8250_ioctl, #ifdef CONFIG_CONSOLE_POLL .poll_get_char = serial8250_get_poll_char, .poll_put_char = serial8250_put_poll_char, @@ -3388,6 +3425,8 @@ int serial8250_register_8250_port(struct uart_8250_port *up) uart->port.fifosize = up->port.fifosize; uart->tx_loadsz = up->tx_loadsz; uart->capabilities = up->capabilities; + uart->rs485_config = up->rs485_config; + uart->rs485 = up->rs485; /* Take tx_loadsz from fifosize if it wasn't set separately */ if (uart->port.fifosize && !uart->tx_loadsz) diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index f93649e22c43..6dd671765312 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -96,10 +96,13 @@ struct uart_8250_port { unsigned char msr_saved_flags; struct uart_8250_dma *dma; + struct serial_rs485 rs485; /* 8250 specific callbacks */ int (*dl_read)(struct uart_8250_port *); void (*dl_write)(struct uart_8250_port *, int); + int (*rs485_config)(struct uart_8250_port *, + struct serial_rs485 *rs485); }; static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up) -- cgit v1.2.3 From 55e15c949fd05d247a889df0ed0177a676fec665 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:17 +0200 Subject: PM / domains: Remove the pm_genpd_add|remove_callbacks APIs There are no users of these APIs. To simplify the generic power domain let's remove them. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 106 -------------------------------------------- include/linux/pm_domain.h | 19 -------- 2 files changed, 125 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index eee55c1e5fde..e613e3cb96d8 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1743,112 +1743,6 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, return ret; } -/** - * pm_genpd_add_callbacks - Add PM domain callbacks to a given device. - * @dev: Device to add the callbacks to. - * @ops: Set of callbacks to add. - * @td: Timing data to add to the device along with the callbacks (optional). - * - * Every call to this routine should be balanced with a call to - * __pm_genpd_remove_callbacks() and they must not be nested. - */ -int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops, - struct gpd_timing_data *td) -{ - struct generic_pm_domain_data *gpd_data_new, *gpd_data = NULL; - int ret = 0; - - if (!(dev && ops)) - return -EINVAL; - - gpd_data_new = __pm_genpd_alloc_dev_data(dev); - if (!gpd_data_new) - return -ENOMEM; - - pm_runtime_disable(dev); - device_pm_lock(); - - ret = dev_pm_get_subsys_data(dev); - if (ret) - goto out; - - spin_lock_irq(&dev->power.lock); - - if (dev->power.subsys_data->domain_data) { - gpd_data = to_gpd_data(dev->power.subsys_data->domain_data); - } else { - gpd_data = gpd_data_new; - dev->power.subsys_data->domain_data = &gpd_data->base; - } - gpd_data->refcount++; - gpd_data->ops = *ops; - if (td) - gpd_data->td = *td; - - spin_unlock_irq(&dev->power.lock); - - out: - device_pm_unlock(); - pm_runtime_enable(dev); - - if (gpd_data != gpd_data_new) - __pm_genpd_free_dev_data(dev, gpd_data_new); - - return ret; -} -EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks); - -/** - * __pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device. - * @dev: Device to remove the callbacks from. - * @clear_td: If set, clear the device's timing data too. - * - * This routine can only be called after pm_genpd_add_callbacks(). - */ -int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td) -{ - struct generic_pm_domain_data *gpd_data = NULL; - bool remove = false; - int ret = 0; - - if (!(dev && dev->power.subsys_data)) - return -EINVAL; - - pm_runtime_disable(dev); - device_pm_lock(); - - spin_lock_irq(&dev->power.lock); - - if (dev->power.subsys_data->domain_data) { - gpd_data = to_gpd_data(dev->power.subsys_data->domain_data); - gpd_data->ops = (struct gpd_dev_ops){ NULL }; - if (clear_td) - gpd_data->td = (struct gpd_timing_data){ 0 }; - - if (--gpd_data->refcount == 0) { - dev->power.subsys_data->domain_data = NULL; - remove = true; - } - } else { - ret = -EINVAL; - } - - spin_unlock_irq(&dev->power.lock); - - device_pm_unlock(); - pm_runtime_enable(dev); - - if (ret) - return ret; - - dev_pm_put_subsys_data(dev); - if (remove) - __pm_genpd_free_dev_data(dev, gpd_data); - - return 0; -} -EXPORT_SYMBOL_GPL(__pm_genpd_remove_callbacks); - /** * pm_genpd_attach_cpuidle - Connect the given PM domain with cpuidle. * @genpd: PM domain to be connected with cpuidle. diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index ebc4c76ffb73..9ae2b9e88d61 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -151,10 +151,6 @@ extern int pm_genpd_add_subdomain_names(const char *master_name, const char *subdomain_name); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); -extern int pm_genpd_add_callbacks(struct device *dev, - struct gpd_dev_ops *ops, - struct gpd_timing_data *td); -extern int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td); extern int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state); extern int pm_genpd_name_attach_cpuidle(const char *name, int state); extern int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd); @@ -217,16 +213,6 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, { return -ENOSYS; } -static inline int pm_genpd_add_callbacks(struct device *dev, - struct gpd_dev_ops *ops, - struct gpd_timing_data *td) -{ - return -ENOSYS; -} -static inline int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td) -{ - return -ENOSYS; -} static inline int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int st) { return -ENOSYS; @@ -281,11 +267,6 @@ static inline int pm_genpd_name_add_device(const char *domain_name, return __pm_genpd_name_add_device(domain_name, dev, NULL); } -static inline int pm_genpd_remove_callbacks(struct device *dev) -{ - return __pm_genpd_remove_callbacks(dev, true); -} - #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); extern void pm_genpd_poweroff_unused(void); -- cgit v1.2.3 From 67da6d4bf43c4208433ef8f3ee487401b4dc9c74 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:18 +0200 Subject: PM / domains: Ignore callbacks for subsys generic_pm_domain_data In a step of simplifying the generic power domain let's move away from using these callbacks. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 131 +++----------------------------------------- include/linux/pm_domain.h | 1 - 2 files changed, 8 insertions(+), 124 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e613e3cb96d8..aa5b14c1e643 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -25,10 +25,6 @@ __routine = genpd->dev_ops.callback; \ if (__routine) { \ __ret = __routine(dev); \ - } else { \ - __routine = dev_gpd_data(dev)->ops.callback; \ - if (__routine) \ - __ret = __routine(dev); \ } \ __ret; \ }) @@ -1871,10 +1867,6 @@ static int pm_genpd_default_save_state(struct device *dev) { int (*cb)(struct device *__dev); - cb = dev_gpd_data(dev)->ops.save_state; - if (cb) - return cb(dev); - if (dev->type && dev->type->pm) cb = dev->type->pm->runtime_suspend; else if (dev->class && dev->class->pm) @@ -1898,10 +1890,6 @@ static int pm_genpd_default_restore_state(struct device *dev) { int (*cb)(struct device *__dev); - cb = dev_gpd_data(dev)->ops.restore_state; - if (cb) - return cb(dev); - if (dev->type && dev->type->pm) cb = dev->type->pm->runtime_resume; else if (dev->class && dev->class->pm) @@ -1917,109 +1905,6 @@ static int pm_genpd_default_restore_state(struct device *dev) return cb ? cb(dev) : 0; } -#ifdef CONFIG_PM_SLEEP - -/** - * pm_genpd_default_suspend - Default "device suspend" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_suspend(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend; - - return cb ? cb(dev) : pm_generic_suspend(dev); -} - -/** - * pm_genpd_default_suspend_late - Default "late device suspend" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_suspend_late(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend_late; - - return cb ? cb(dev) : pm_generic_suspend_late(dev); -} - -/** - * pm_genpd_default_resume_early - Default "early device resume" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_resume_early(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume_early; - - return cb ? cb(dev) : pm_generic_resume_early(dev); -} - -/** - * pm_genpd_default_resume - Default "device resume" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_resume(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume; - - return cb ? cb(dev) : pm_generic_resume(dev); -} - -/** - * pm_genpd_default_freeze - Default "device freeze" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_freeze(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze; - - return cb ? cb(dev) : pm_generic_freeze(dev); -} - -/** - * pm_genpd_default_freeze_late - Default "late device freeze" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_freeze_late(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late; - - return cb ? cb(dev) : pm_generic_freeze_late(dev); -} - -/** - * pm_genpd_default_thaw_early - Default "early device thaw" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_thaw_early(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early; - - return cb ? cb(dev) : pm_generic_thaw_early(dev); -} - -/** - * pm_genpd_default_thaw - Default "device thaw" for PM domians. - * @dev: Device to handle. - */ -static int pm_genpd_default_thaw(struct device *dev) -{ - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw; - - return cb ? cb(dev) : pm_generic_thaw(dev); -} - -#else /* !CONFIG_PM_SLEEP */ - -#define pm_genpd_default_suspend NULL -#define pm_genpd_default_suspend_late NULL -#define pm_genpd_default_resume_early NULL -#define pm_genpd_default_resume NULL -#define pm_genpd_default_freeze NULL -#define pm_genpd_default_freeze_late NULL -#define pm_genpd_default_thaw_early NULL -#define pm_genpd_default_thaw NULL - -#endif /* !CONFIG_PM_SLEEP */ - /** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. @@ -2071,14 +1956,14 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->domain.ops.complete = pm_genpd_complete; genpd->dev_ops.save_state = pm_genpd_default_save_state; genpd->dev_ops.restore_state = pm_genpd_default_restore_state; - genpd->dev_ops.suspend = pm_genpd_default_suspend; - genpd->dev_ops.suspend_late = pm_genpd_default_suspend_late; - genpd->dev_ops.resume_early = pm_genpd_default_resume_early; - genpd->dev_ops.resume = pm_genpd_default_resume; - genpd->dev_ops.freeze = pm_genpd_default_freeze; - genpd->dev_ops.freeze_late = pm_genpd_default_freeze_late; - genpd->dev_ops.thaw_early = pm_genpd_default_thaw_early; - genpd->dev_ops.thaw = pm_genpd_default_thaw; + genpd->dev_ops.suspend = pm_generic_suspend; + genpd->dev_ops.suspend_late = pm_generic_suspend_late; + genpd->dev_ops.resume_early = pm_generic_resume_early; + genpd->dev_ops.resume = pm_generic_resume; + genpd->dev_ops.freeze = pm_generic_freeze; + genpd->dev_ops.freeze_late = pm_generic_freeze_late; + genpd->dev_ops.thaw_early = pm_generic_thaw_early; + genpd->dev_ops.thaw = pm_generic_thaw; mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 9ae2b9e88d61..436ea149d40c 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -108,7 +108,6 @@ struct gpd_timing_data { struct generic_pm_domain_data { struct pm_domain_data base; - struct gpd_dev_ops ops; struct gpd_timing_data td; struct notifier_block nb; struct mutex lock; -- cgit v1.2.3 From 1e0407ca54d28db8e5f02e437ff21cc6416c0be8 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:19 +0200 Subject: PM / domains: Remove system PM callbacks from gpd_dev_ops There no users of these callbacks, let's simplify the generic power domain by removing them. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 64 ++++++--------------------------------------- include/linux/pm_domain.h | 8 ------ 2 files changed, 8 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index aa5b14c1e643..e777609ccc77 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -770,46 +770,6 @@ static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd, return GENPD_DEV_CALLBACK(genpd, bool, active_wakeup, dev); } -static int genpd_suspend_dev(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, suspend, dev); -} - -static int genpd_suspend_late(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, suspend_late, dev); -} - -static int genpd_resume_early(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, resume_early, dev); -} - -static int genpd_resume_dev(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, resume, dev); -} - -static int genpd_freeze_dev(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, freeze, dev); -} - -static int genpd_freeze_late(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, freeze_late, dev); -} - -static int genpd_thaw_early(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, thaw_early, dev); -} - -static int genpd_thaw_dev(struct generic_pm_domain *genpd, struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, thaw, dev); -} - /** * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters. * @genpd: PM domain to power off, if possible. @@ -991,7 +951,7 @@ static int pm_genpd_suspend(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_suspend_dev(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_suspend(dev); } /** @@ -1012,7 +972,7 @@ static int pm_genpd_suspend_late(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_suspend_late(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_suspend_late(dev); } /** @@ -1099,7 +1059,7 @@ static int pm_genpd_resume_early(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_resume_early(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_resume_early(dev); } /** @@ -1120,7 +1080,7 @@ static int pm_genpd_resume(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_resume_dev(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_resume(dev); } /** @@ -1141,7 +1101,7 @@ static int pm_genpd_freeze(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_freeze_dev(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_freeze(dev); } /** @@ -1163,7 +1123,7 @@ static int pm_genpd_freeze_late(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_freeze_late(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_freeze_late(dev); } /** @@ -1227,7 +1187,7 @@ static int pm_genpd_thaw_early(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_thaw_early(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_thaw_early(dev); } /** @@ -1248,7 +1208,7 @@ static int pm_genpd_thaw(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_thaw_dev(genpd, dev); + return genpd->suspend_power_off ? 0 : pm_generic_thaw(dev); } /** @@ -1956,14 +1916,6 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->domain.ops.complete = pm_genpd_complete; genpd->dev_ops.save_state = pm_genpd_default_save_state; genpd->dev_ops.restore_state = pm_genpd_default_restore_state; - genpd->dev_ops.suspend = pm_generic_suspend; - genpd->dev_ops.suspend_late = pm_generic_suspend_late; - genpd->dev_ops.resume_early = pm_generic_resume_early; - genpd->dev_ops.resume = pm_generic_resume; - genpd->dev_ops.freeze = pm_generic_freeze; - genpd->dev_ops.freeze_late = pm_generic_freeze_late; - genpd->dev_ops.thaw_early = pm_generic_thaw_early; - genpd->dev_ops.thaw = pm_generic_thaw; mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 436ea149d40c..b6343d4b9b04 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -35,14 +35,6 @@ struct gpd_dev_ops { int (*stop)(struct device *dev); int (*save_state)(struct device *dev); int (*restore_state)(struct device *dev); - int (*suspend)(struct device *dev); - int (*suspend_late)(struct device *dev); - int (*resume_early)(struct device *dev); - int (*resume)(struct device *dev); - int (*freeze)(struct device *dev); - int (*freeze_late)(struct device *dev); - int (*thaw_early)(struct device *dev); - int (*thaw)(struct device *dev); bool (*active_wakeup)(struct device *dev); }; -- cgit v1.2.3 From c5d79ec2a5715489cff16a0d1cf4fa9108a5509e Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:22 +0200 Subject: PM / domains: Remove dev_irq_safe from genpd config The genpd dev_irq_safe configuration somewhat overlaps with the runtime PM pm_runtime_irq_safe() option. Also, currently genpd don't have a good way to deal with these device. So, until we figured out if and how to support this in genpd, let's remove the option to configure it. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 4 ---- include/linux/pm_domain.h | 1 - 2 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e777609ccc77..4298a2bcd228 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -615,8 +615,6 @@ static int pm_genpd_runtime_suspend(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - might_sleep_if(!genpd->dev_irq_safe); - stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL; if (stop_ok && !stop_ok(dev)) return -EBUSY; @@ -661,8 +659,6 @@ static int pm_genpd_runtime_resume(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - might_sleep_if(!genpd->dev_irq_safe); - /* If power.irq_safe, the PM domain is never powered off. */ if (dev->power.irq_safe) return genpd_start_dev_no_timing(genpd, dev); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b6343d4b9b04..360c94ceeebb 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -63,7 +63,6 @@ struct generic_pm_domain { unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ bool suspend_power_off; /* Power status before system suspend */ - bool dev_irq_safe; /* Device callbacks are IRQ-safe */ int (*power_off)(struct generic_pm_domain *domain); s64 power_off_latency_ns; int (*power_on)(struct generic_pm_domain *domain); -- cgit v1.2.3 From d47e6464ae6c96735d4706f5cb0537fe717b6b00 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:24 +0200 Subject: PM / domains: Remove pm_genpd_syscore_switch() API The pm_genpd_syscore_poweroff() API and pm_genpd_syscore_poweron() API makes the pm_genpd_syscore_switch() API redundant. Moreover, since there are no active users, let's just remove it. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 17 ++++++++++++++--- include/linux/pm_domain.h | 16 ++++------------ 2 files changed, 18 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index b910d0f6ff60..601e35b2fa71 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1292,13 +1292,13 @@ static void pm_genpd_complete(struct device *dev) } /** - * pm_genpd_syscore_switch - Switch power during system core suspend or resume. + * genpd_syscore_switch - Switch power during system core suspend or resume. * @dev: Device that normally is marked as "always on" to switch power for. * * This routine may only be called during the system core (syscore) suspend or * resume phase for devices whose "always on" flags are set. */ -void pm_genpd_syscore_switch(struct device *dev, bool suspend) +static void genpd_syscore_switch(struct device *dev, bool suspend) { struct generic_pm_domain *genpd; @@ -1314,7 +1314,18 @@ void pm_genpd_syscore_switch(struct device *dev, bool suspend) genpd->suspended_count--; } } -EXPORT_SYMBOL_GPL(pm_genpd_syscore_switch); + +void pm_genpd_syscore_poweroff(struct device *dev) +{ + genpd_syscore_switch(dev, true); +} +EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweroff); + +void pm_genpd_syscore_poweron(struct device *dev) +{ + genpd_syscore_switch(dev, false); +} +EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron); #else diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 360c94ceeebb..3997af691600 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -266,19 +266,11 @@ static inline void pm_genpd_poweroff_unused(void) {} #endif #ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP -extern void pm_genpd_syscore_switch(struct device *dev, bool suspend); +extern void pm_genpd_syscore_poweroff(struct device *dev); +extern void pm_genpd_syscore_poweron(struct device *dev); #else -static inline void pm_genpd_syscore_switch(struct device *dev, bool suspend) {} +static inline void pm_genpd_syscore_poweroff(struct device *dev) {} +static inline void pm_genpd_syscore_poweron(struct device *dev) {} #endif -static inline void pm_genpd_syscore_poweroff(struct device *dev) -{ - pm_genpd_syscore_switch(dev, true); -} - -static inline void pm_genpd_syscore_poweron(struct device *dev) -{ - pm_genpd_syscore_switch(dev, false); -} - #endif /* _LINUX_PM_DOMAIN_H */ -- cgit v1.2.3 From d971f0b0eaaf3f2086bf21bbd64f7ea7e2f28459 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:25 +0200 Subject: PM / domains: Remove genpd_queue_power_off_work() API There are no active users of this API. Let's remove it and if future needs shows up we could consider to have a get/put API instead. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 5 ++++- include/linux/pm_domain.h | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 601e35b2fa71..d71d0458d41f 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -422,7 +422,7 @@ static bool genpd_abort_poweroff(struct generic_pm_domain *genpd) * Queue up the execution of pm_genpd_poweroff() unless it's already been done * before. */ -void genpd_queue_power_off_work(struct generic_pm_domain *genpd) +static void genpd_queue_power_off_work(struct generic_pm_domain *genpd) { queue_work(pm_wq, &genpd->power_off_work); } @@ -729,6 +729,9 @@ static inline int genpd_dev_pm_qos_notifier(struct notifier_block *nb, return NOTIFY_DONE; } +static inline void +genpd_queue_power_off_work(struct generic_pm_domain *genpd) {} + static inline void genpd_power_off_work_fn(struct work_struct *work) {} #define pm_genpd_runtime_suspend NULL diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 3997af691600..c19c90b839b8 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -258,10 +258,8 @@ static inline int pm_genpd_name_add_device(const char *domain_name, } #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME -extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); extern void pm_genpd_poweroff_unused(void); #else -static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {} static inline void pm_genpd_poweroff_unused(void) {} #endif -- cgit v1.2.3 From 0f574d4c3a7a325cbbef28ee738dedca9851e957 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:30 +0200 Subject: PM / domains: Remove default_stop_ok() API There are currently no need to export default_stop_ok() as an API, instead let's keep it local to the PM domain governor. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain_governor.c | 7 ++----- include/linux/pm_domain.h | 6 ------ 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index a089e3bcdfbc..d88a62e104d4 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -42,7 +42,7 @@ static int dev_update_qos_constraint(struct device *dev, void *data) * default_stop_ok - Default PM domain governor routine for stopping devices. * @dev: Device to check. */ -bool default_stop_ok(struct device *dev) +static bool default_stop_ok(struct device *dev) { struct gpd_timing_data *td = &dev_gpd_data(dev)->td; unsigned long flags; @@ -229,10 +229,7 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain) #else /* !CONFIG_PM_RUNTIME */ -bool default_stop_ok(struct device *dev) -{ - return false; -} +static inline bool default_stop_ok(struct device *dev) { return false; } #define default_power_down_ok NULL #define always_on_power_down_ok NULL diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index c19c90b839b8..3b59f296e6ec 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -151,8 +151,6 @@ extern void pm_genpd_init(struct generic_pm_domain *genpd, extern int pm_genpd_poweron(struct generic_pm_domain *genpd); extern int pm_genpd_name_poweron(const char *domain_name); -extern bool default_stop_ok(struct device *dev); - extern struct dev_power_governor pm_domain_always_on_gov; #else @@ -231,10 +229,6 @@ static inline int pm_genpd_name_poweron(const char *domain_name) { return -ENOSYS; } -static inline bool default_stop_ok(struct device *dev) -{ - return false; -} #define simple_qos_governor NULL #define pm_domain_always_on_gov NULL #endif -- cgit v1.2.3 From ae3c511c2d72161b11e93866203b59a3a37dfac7 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 3 Sep 2014 12:52:31 +0200 Subject: PM / domains: Keep declaration of dev_power_governors together This is a pure code cleanup in the header file for the PM domain. No functional change. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 3b59f296e6ec..aa03586c94a2 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -117,8 +117,6 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) return to_gpd_data(dev->power.subsys_data->domain_data); } -extern struct dev_power_governor simple_qos_governor; - extern struct generic_pm_domain *dev_to_genpd(struct device *dev); extern int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, @@ -151,6 +149,7 @@ extern void pm_genpd_init(struct generic_pm_domain *genpd, extern int pm_genpd_poweron(struct generic_pm_domain *genpd); extern int pm_genpd_name_poweron(const char *domain_name); +extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; #else -- cgit v1.2.3 From 8a949b07e4062cbd07e04e6a47249e69ca65b944 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Tue, 2 Sep 2014 17:39:19 -0400 Subject: serial: core: Document lock requirement for UPF_* flags updates The flags field of struct uart_port can only be safely modified if the port mutex is held; no other lock prevents concurrent changes from corrupting the field. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index cf3a1e789bf5..8cb267b1fcd5 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -152,6 +152,7 @@ struct uart_port { unsigned long sysrq; /* sysrq timeout */ #endif + /* flags must be updated while holding port mutex */ upf_t flags; #define UPF_FOURPORT ((__force upf_t) (1 << 1)) -- cgit v1.2.3 From b99b121b2aa42e60e5b73fdd3a49863337839c7b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 5 Sep 2014 21:02:37 +0200 Subject: tty: serial: 8250_core: allow to overwrite & export serial8250_startup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OMAP version of the 8250 can actually use 1:1 serial8250_startup(). However it needs to be extended by a wake up irq which should to be requested & enabled at ->startup() time and disabled at ->shutdown() time. v2…v3: properly copy callbacks v1…v2: add shutdown callback Acked-by: Alan Cox Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 25 +++++++++++++++++++++++-- include/linux/serial_8250.h | 2 ++ include/linux/serial_core.h | 2 ++ 3 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 90e0d5e2b7ee..872c020607a8 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -1930,7 +1930,7 @@ static void serial8250_put_poll_char(struct uart_port *port, #endif /* CONFIG_CONSOLE_POLL */ -static int serial8250_startup(struct uart_port *port) +int serial8250_do_startup(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned long flags; @@ -2181,8 +2181,16 @@ dont_test_tx_en: return 0; } +EXPORT_SYMBOL_GPL(serial8250_do_startup); -static void serial8250_shutdown(struct uart_port *port) +static int serial8250_startup(struct uart_port *port) +{ + if (port->startup) + return port->startup(port); + return serial8250_do_startup(port); +} + +void serial8250_do_shutdown(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); unsigned long flags; @@ -2232,6 +2240,15 @@ static void serial8250_shutdown(struct uart_port *port) if (port->irq) serial_unlink_irq_chain(up); } +EXPORT_SYMBOL_GPL(serial8250_do_shutdown); + +static void serial8250_shutdown(struct uart_port *port) +{ + if (port->shutdown) + port->shutdown(port); + else + serial8250_do_shutdown(port); +} static unsigned int serial8250_get_divisor(struct uart_port *port, unsigned int baud) { @@ -3466,6 +3483,10 @@ int serial8250_register_8250_port(struct uart_8250_port *up) /* Possibly override set_termios call */ if (up->port.set_termios) uart->port.set_termios = up->port.set_termios; + if (up->port.startup) + uart->port.startup = up->port.startup; + if (up->port.shutdown) + uart->port.shutdown = up->port.shutdown; if (up->port.pm) uart->port.pm = up->port.pm; if (up->port.handle_break) diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 6dd671765312..6fc9d7bee05e 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -124,6 +124,8 @@ extern void serial8250_early_out(struct uart_port *port, int offset, int value); extern int setup_early_serial8250_console(char *cmdline); extern void serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old); +extern int serial8250_do_startup(struct uart_port *port); +extern void serial8250_do_shutdown(struct uart_port *port); extern void serial8250_do_pm(struct uart_port *port, unsigned int state, unsigned int oldstate); extern int fsl8250_handle_irq(struct uart_port *port); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8cb267b1fcd5..3bd7d55eebce 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -122,6 +122,8 @@ struct uart_port { void (*set_termios)(struct uart_port *, struct ktermios *new, struct ktermios *old); + int (*startup)(struct uart_port *port); + void (*shutdown)(struct uart_port *port); int (*handle_irq)(struct uart_port *); void (*pm)(struct uart_port *, unsigned int state, unsigned int old); -- cgit v1.2.3 From 413fffc3a1db7f270afdf1ecb35c1edc013acc68 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 28 Aug 2014 11:22:23 +0530 Subject: cpufreq: Add support for per-policy driver data Drivers supporting multiple clusters or multiple 'struct cpufreq_policy' instances may need to keep per-policy data. If the core doesn't provide support for that, they might do it in the most unoptimized way: 'per-cpu' data. This patch adds another field in struct cpufreq_policy: 'driver_data'. It isn't accessed by core and is for driver's internal use only. Tested-by: Stephen Boyd Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7d1955afa62c..138336b6bb04 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -112,6 +112,9 @@ struct cpufreq_policy { spinlock_t transition_lock; wait_queue_head_t transition_wait; struct task_struct *transition_task; /* Task which is doing the transition */ + + /* For cpufreq driver's internal use */ + void *driver_data; }; /* Only for ACPI */ -- cgit v1.2.3 From da3dae54e4ff09886b9a19224c8d9556bb2ba096 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 9 Sep 2014 01:27:23 +0900 Subject: Documentation: Docbook: Fix generated DocBook/kernel-api.xml This patch fix spelling typo found in DocBook/kernel-api.xml. It is because the file is generated from the source comments, I have to fix the comments in source codes. Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina --- block/blk-core.c | 8 ++++---- block/genhd.c | 2 +- include/linux/clk.h | 2 +- include/linux/kfifo.h | 2 +- ipc/util.c | 6 +++--- kernel/auditsc.c | 2 +- lib/bitmap.c | 4 ++-- lib/idr.c | 2 +- lib/vsprintf.c | 2 +- mm/filemap.c | 2 +- security/inode.c | 2 +- 11 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 6f8dba161bfe..ce5a389cea94 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -240,7 +240,7 @@ EXPORT_SYMBOL(blk_stop_queue); * this function. * * This function does not cancel any asynchronous activity arising - * out of elevator or throttling code. That would require elevaotor_exit() + * out of elevator or throttling code. That would require elevator_exit() * and blkcg_exit_queue() to be called with queue lock initialized. * */ @@ -930,7 +930,7 @@ static struct io_context *rq_ioc(struct bio *bio) * Get a free request from @q. This function may fail under memory * pressure or if @q is dead. * - * Must be callled with @q->queue_lock held and, + * Must be called with @q->queue_lock held and, * Returns %NULL on failure, with @q->queue_lock held. * Returns !%NULL on success, with @q->queue_lock *not held*. */ @@ -1107,7 +1107,7 @@ rq_starved: * Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this * function keeps retrying under memory pressure and fails iff @q is dead. * - * Must be callled with @q->queue_lock held and, + * Must be called with @q->queue_lock held and, * Returns %NULL on failure, with @q->queue_lock held. * Returns !%NULL on success, with @q->queue_lock *not held*. */ @@ -1238,7 +1238,7 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio, EXPORT_SYMBOL(blk_make_request); /** - * blk_rq_set_block_pc - initialize a requeest to type BLOCK_PC + * blk_rq_set_block_pc - initialize a request to type BLOCK_PC * @rq: request to be initialized * */ diff --git a/block/genhd.c b/block/genhd.c index 791f41943132..048e2cee43d2 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1543,7 +1543,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask) /** * disk_clear_events - synchronously check, clear and return pending events * @disk: disk to fetch and clear events from - * @mask: mask of events to be fetched and clearted + * @mask: mask of events to be fetched and cleared * * Disk events are synchronously checked and pending events in @mask * are cleared and returned. This ignores the block count. diff --git a/include/linux/clk.h b/include/linux/clk.h index fb5e097d8f72..afb44bfaf8d1 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -238,7 +238,7 @@ void clk_put(struct clk *clk); /** * devm_clk_put - "free" a managed clock source - * @dev: device used to acuqire the clock + * @dev: device used to acquire the clock * @clk: clock source acquired with devm_clk_get() * * Note: drivers must ensure that all clk_enable calls made on this diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 554fde3a3927..473b43678ad1 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -722,7 +722,7 @@ __kfifo_uint_must_check_helper( \ /** * kfifo_dma_out_finish - finish a DMA OUT operation * @fifo: address of the fifo to be used - * @len: number of bytes transferrd + * @len: number of bytes transferred * * This macro finish a DMA OUT operation. The out counter will be updated by * the len parameter. No error checking will be done. diff --git a/ipc/util.c b/ipc/util.c index 27d74e69fd57..d73b7af581e2 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -309,7 +309,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) /** * ipcget_new - create a new ipc object * @ns: ipc namespace - * @ids: ipc identifer set + * @ids: ipc identifier set * @ops: the actual creation routine to call * @params: its parameters * @@ -363,7 +363,7 @@ static int ipc_check_perms(struct ipc_namespace *ns, /** * ipcget_public - get an ipc object or create a new one * @ns: ipc namespace - * @ids: ipc identifer set + * @ids: ipc identifier set * @ops: the actual creation routine to call * @params: its parameters * @@ -669,7 +669,7 @@ out: /** * ipcget - Common sys_*get() code - * @ns: namsepace + * @ns: namespace * @ids: ipc identifier set * @ops: operations to be called on ipc object creation, permission checks * and further checks diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 21eae3c05ec0..7208c1df248d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2406,7 +2406,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, * @new: the new credentials * @old: the old (current) credentials * - * Record the aguments userspace sent to sys_capset for later printing by the + * Record the arguments userspace sent to sys_capset for later printing by the * audit system if applicable */ void __audit_log_capset(const struct cred *new, const struct cred *old) diff --git a/lib/bitmap.c b/lib/bitmap.c index 06f7e4fe8d2d..ed0f00c3da0a 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -882,7 +882,7 @@ EXPORT_SYMBOL(bitmap_bitremap); * read it, you're overqualified for your current job.) * * In other words, @orig is mapped onto (surjectively) @dst, - * using the the map { | the n-th bit of @relmap is the + * using the map { | the n-th bit of @relmap is the * m-th set bit of @relmap }. * * Any set bits in @orig above bit number W, where W is the @@ -930,7 +930,7 @@ EXPORT_SYMBOL(bitmap_bitremap); * * Further lets say we use the following code, invoking * bitmap_fold() then bitmap_onto, as suggested above to - * avoid the possitility of an empty @dst result: + * avoid the possibility of an empty @dst result: * * unsigned long *tmp; // a temporary bitmap's bits * diff --git a/lib/idr.c b/lib/idr.c index 39158abebad1..460abfd1450b 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -625,7 +625,7 @@ static void __idr_remove_all(struct idr *idp) * idr_destroy(). * * A typical clean-up sequence for objects stored in an idr tree will use - * idr_for_each() to free all objects, if necessay, then idr_destroy() to + * idr_for_each() to free all objects, if necessary, then idr_destroy() to * free up the id mappings and cached idr_layers. */ void idr_destroy(struct idr *idp) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 6fe2c84eb055..ba3cd0a35640 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1937,7 +1937,7 @@ EXPORT_SYMBOL(sprintf); * @args: Arguments for the format string * * The format follows C99 vsnprintf, except %n is ignored, and its argument - * is skiped. + * is skipped. * * The return value is the number of words(32bits) which would be generated for * the given input. diff --git a/mm/filemap.c b/mm/filemap.c index 65d44fd88c78..f092654cc1a3 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -720,7 +720,7 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue); * * Unlocks the page and wakes up sleepers in ___wait_on_page_locked(). * Also wakes sleepers in wait_on_page_writeback() because the wakeup - * mechananism between PageLocked pages and PageWriteback pages is shared. + * mechanism between PageLocked pages and PageWriteback pages is shared. * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. * * The mb is necessary to enforce ordering between the clear_bit and the read diff --git a/security/inode.c b/security/inode.c index 43ce6e19015f..8e7ca62078ab 100644 --- a/security/inode.c +++ b/security/inode.c @@ -74,7 +74,7 @@ static struct file_system_type fs_type = { * pointer must be passed to the securityfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, * you are responsible here). If an error occurs, the function will return - * the erorr value (via ERR_PTR). + * the error value (via ERR_PTR). * * If securityfs is not enabled in the kernel, the value %-ENODEV is * returned. -- cgit v1.2.3 From 8ca28610e5e37193cd61fefa4310941e28de10ca Mon Sep 17 00:00:00 2001 From: Hans-Christian Egtvedt Date: Thu, 7 Aug 2014 15:14:06 +0200 Subject: mmc: include linux/types.h for bool definition in atmel-mci.h This patch adds an include of linux/types.h to make sure bool is defined before utilized in this header file. Signed-off-by: Hans-Christian Egtvedt Acked-by: Ludovic Desroches Signed-off-by: Ulf Hansson --- include/linux/atmel-mci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h index 4c7a4b2104bf..91b77f8d495d 100644 --- a/include/linux/atmel-mci.h +++ b/include/linux/atmel-mci.h @@ -1,6 +1,8 @@ #ifndef __LINUX_ATMEL_MCI_H #define __LINUX_ATMEL_MCI_H +#include + #define ATMCI_MAX_NR_SLOTS 2 /** -- cgit v1.2.3 From b3683994843a0ede0e19daccd1ac32a46b21eb39 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Wed, 13 Aug 2014 13:34:01 +0800 Subject: mmc: Correct the value of MMC_NUM_PHY_PARTITION eMMC card can support up to 7 physical partitions, including 2 boot, 1 RPMB and 4 GPs. Change MMC_NUM_PHY_PARTITION from 6 to 7, which is the correct value. Signed-off-by: Yi Sun Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index d424b9de3aff..bde5147a4221 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -214,11 +214,12 @@ enum mmc_blk_status { }; /* The number of MMC physical partitions. These consist of: - * boot partitions (2), general purpose partitions (4) in MMC v4.4. + * boot partitions (2), general purpose partitions (4) and + * RPMB partition (1) in MMC v4.4. */ #define MMC_NUM_BOOT_PARTITION 2 #define MMC_NUM_GP_PARTITION 4 -#define MMC_NUM_PHY_PARTITION 6 +#define MMC_NUM_PHY_PARTITION 7 #define MAX_MMC_PART_NAME_LEN 20 /* -- cgit v1.2.3 From 3d705d14fe4c72be83bae1610680e209ee226b9d Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 19 Aug 2014 10:45:51 +0200 Subject: mmc: implement Driver Stage Register handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some eMMC and SD cards implement a DSR register that allows to tune raise/fall times and drive strength of the CMD and DATA outputs. The values to use depend on the card in use and the host. It might be needed to reduce the drive strength to prevent voltage peaks above the host's specification. Implement a 'dsr' devicetree property that allows to specify the value to set the DSR to. For non-dt setups the new members of mmc_host can be set by board code. This patch was initially authored by Sascha Hauer. It contains improvements authored by Markus Niebel and Uwe Kleine-König. Signed-off-by: Sascha Hauer Signed-off-by: Markus Niebel Signed-off-by: Uwe Kleine-König Signed-off-by: Ulf Hansson --- Documentation/devicetree/bindings/mmc/mmc.txt | 2 ++ drivers/mmc/core/host.c | 8 ++++++++ drivers/mmc/core/mmc.c | 8 ++++++++ drivers/mmc/core/mmc_ops.c | 20 ++++++++++++++++++++ drivers/mmc/core/mmc_ops.h | 1 + drivers/mmc/core/sd.c | 8 ++++++++ include/linux/mmc/card.h | 3 ++- include/linux/mmc/host.h | 3 +++ 8 files changed, 52 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt index 431716e37a39..b52628b18a53 100644 --- a/Documentation/devicetree/bindings/mmc/mmc.txt +++ b/Documentation/devicetree/bindings/mmc/mmc.txt @@ -40,6 +40,8 @@ Optional properties: - mmc-hs200-1_2v: eMMC HS200 mode(1.2V I/O) is supported - mmc-hs400-1_8v: eMMC HS400 mode(1.8V I/O) is supported - mmc-hs400-1_2v: eMMC HS400 mode(1.2V I/O) is supported +- dsr: Value the card's (optional) Driver Stage Register (DSR) should be + programmed with. Valid range: [0 .. 0xffff]. *NOTE* on CD and WP polarity. To use common for all SD/MMC host controllers line polarity properties, we have to fix the meaning of the "normal" and "inverted" diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 95cceae96944..d572b2beb65a 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -452,6 +452,14 @@ int mmc_of_parse(struct mmc_host *host) if (of_find_property(np, "mmc-hs400-1_2v", &len)) host->caps2 |= MMC_CAP2_HS400_1_2V | MMC_CAP2_HS200_1_2V_SDR; + host->dsr_req = !of_property_read_u32(np, "dsr", &host->dsr); + if (host->dsr_req && (host->dsr & ~0xffff)) { + dev_err(host->parent, + "device tree specified broken value for DSR: 0x%x, ignoring\n", + host->dsr); + host->dsr_req = 0; + } + return 0; out: diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 1eda8dd8c867..31c43165f8bc 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -162,6 +162,7 @@ static int mmc_decode_csd(struct mmc_card *card) csd->read_partial = UNSTUFF_BITS(resp, 79, 1); csd->write_misalign = UNSTUFF_BITS(resp, 78, 1); csd->read_misalign = UNSTUFF_BITS(resp, 77, 1); + csd->dsr_imp = UNSTUFF_BITS(resp, 76, 1); csd->r2w_factor = UNSTUFF_BITS(resp, 26, 3); csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4); csd->write_partial = UNSTUFF_BITS(resp, 21, 1); @@ -1271,6 +1272,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, goto free_card; } + /* + * handling only for cards supporting DSR and hosts requesting + * DSR configuration + */ + if (card->csd.dsr_imp && host->dsr_req) + mmc_set_dsr(host); + /* * Select card, as all following commands rely on that. */ diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index f51b5ba3bbea..ba0275e90617 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -93,6 +93,26 @@ int mmc_deselect_cards(struct mmc_host *host) return _mmc_select_card(host, NULL); } +/* + * Write the value specified in the device tree or board code into the optional + * 16 bit Driver Stage Register. This can be used to tune raise/fall times and + * drive strength of the DAT and CMD outputs. The actual meaning of a given + * value is hardware dependant. + * The presence of the DSR register can be determined from the CSD register, + * bit 76. + */ +int mmc_set_dsr(struct mmc_host *host) +{ + struct mmc_command cmd = {0}; + + cmd.opcode = MMC_SET_DSR; + + cmd.arg = (host->dsr << 16) | 0xffff; + cmd.flags = MMC_RSP_NONE | MMC_CMD_AC; + + return mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES); +} + int mmc_go_idle(struct mmc_host *host) { int err; diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h index 80ae9f4e0293..390dac665b2a 100644 --- a/drivers/mmc/core/mmc_ops.h +++ b/drivers/mmc/core/mmc_ops.h @@ -14,6 +14,7 @@ int mmc_select_card(struct mmc_card *card); int mmc_deselect_cards(struct mmc_host *host); +int mmc_set_dsr(struct mmc_host *host); int mmc_go_idle(struct mmc_host *host); int mmc_send_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr); int mmc_all_send_cid(struct mmc_host *host, u32 *cid); diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 0c44510bf717..25913889cbaa 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -127,6 +127,7 @@ static int mmc_decode_csd(struct mmc_card *card) csd->read_partial = UNSTUFF_BITS(resp, 79, 1); csd->write_misalign = UNSTUFF_BITS(resp, 78, 1); csd->read_misalign = UNSTUFF_BITS(resp, 77, 1); + csd->dsr_imp = UNSTUFF_BITS(resp, 76, 1); csd->r2w_factor = UNSTUFF_BITS(resp, 26, 3); csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4); csd->write_partial = UNSTUFF_BITS(resp, 21, 1); @@ -953,6 +954,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, mmc_decode_cid(card); } + /* + * handling only for cards supporting DSR and hosts requesting + * DSR configuration + */ + if (card->csd.dsr_imp && host->dsr_req) + mmc_set_dsr(host); + /* * Select card, as all following commands rely on that. */ diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index bde5147a4221..0ea795f5feb9 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -42,7 +42,8 @@ struct mmc_csd { unsigned int read_partial:1, read_misalign:1, write_partial:1, - write_misalign:1; + write_misalign:1, + dsr_imp:1; }; struct mmc_ext_csd { diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 7960424d0bc0..4cbf61476999 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -365,6 +365,9 @@ struct mmc_host { unsigned int slotno; /* used for sdio acpi binding */ + int dsr_req; /* DSR value is valid */ + u32 dsr; /* optional driver stage (DSR) value */ + unsigned long private[0] ____cacheline_aligned; }; -- cgit v1.2.3 From 384b2cbd56a02efb16358ed7c0c039e4afca5ed0 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Sun, 24 Aug 2014 19:58:48 -0700 Subject: mmc: tmio: care about DMA tx/rx addr offset Basically, SD_BUF0 Tx/Rx addresses are same in normal TMIO controller, but, it is different on Renesas R-Car SDHI controller if it uses DMAC (Rx address needs to add 0x2000 to Tx address) This patch adds new .dma_rx_offset and cares it Tested-by: Nguyen Xuan Nui Tested-by: Hiep Cao Minh Acked-by: Laurent Pinchart Acked-by: Ben Dooks Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mobile_sdhi.c | 3 +++ drivers/mmc/host/tmio_mmc_dma.c | 2 +- include/linux/mfd/tmio.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index 2dafe28c8df9..f0818cd0242c 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -39,6 +39,7 @@ struct sh_mobile_sdhi_of_data { unsigned long tmio_flags; unsigned long capabilities; unsigned long capabilities2; + dma_addr_t dma_rx_offset; }; static const struct sh_mobile_sdhi_of_data sh_mobile_sdhi_of_cfg[] = { @@ -56,6 +57,7 @@ static const struct sh_mobile_sdhi_of_data of_rcar_gen2_compatible = { .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE, .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ, .capabilities2 = MMC_CAP2_NO_MULTI_READ, + .dma_rx_offset = 0x2000, }; static const struct of_device_id sh_mobile_sdhi_of_match[] = { @@ -228,6 +230,7 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev) mmc_data->flags |= of_data->tmio_flags; mmc_data->capabilities |= of_data->capabilities; mmc_data->capabilities2 |= of_data->capabilities2; + dma_priv->dma_rx_offset = of_data->dma_rx_offset; } /* SD control register space size is 0x100, 0x200 for bus_shift=1 */ diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c index eb8f1d5c34b1..bd646b041085 100644 --- a/drivers/mmc/host/tmio_mmc_dma.c +++ b/drivers/mmc/host/tmio_mmc_dma.c @@ -312,7 +312,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat if (pdata->dma->chan_priv_rx) cfg.slave_id = pdata->dma->slave_id_rx; cfg.direction = DMA_DEV_TO_MEM; - cfg.src_addr = cfg.dst_addr; + cfg.src_addr = cfg.dst_addr + pdata->dma->dma_rx_offset; cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; cfg.dst_addr = 0; ret = dmaengine_slave_config(host->chan_rx, &cfg); diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 8f6f2e91e7ae..777e29b1ad0f 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -96,6 +96,7 @@ struct tmio_mmc_dma { int slave_id_tx; int slave_id_rx; int alignment_shift; + dma_addr_t dma_rx_offset; bool (*filter)(struct dma_chan *chan, void *arg); }; -- cgit v1.2.3 From b8d11962c2d83c984d5afd091e5b725ad2fd5607 Mon Sep 17 00:00:00 2001 From: Shinobu Uehara Date: Sun, 24 Aug 2014 20:00:25 -0700 Subject: mmc: tmio: control multiple block transfer mode Renesas SDHI has "Multiple Block Transfer Mode" settings on SD_CMD register which controls CMD12 automatically. This patch cares it, because CMD12 is not needed when CMD53 (= SD_IO_RW_EXTENDED) [Kuninori Morimoto: tidyuped for upstreaming enabled this flags for all SH-Mobile/R-Car] Tested-by: Nguyen Xuan Nui Tested-by: Hiep Cao Minh Signed-off-by: Shinobu Uehara Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mobile_sdhi.c | 5 +++++ drivers/mmc/host/tmio_mmc_pio.c | 10 ++++++++++ include/linux/mfd/tmio.h | 6 ++++++ 3 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index f0818cd0242c..4727586b063e 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -225,6 +225,11 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev) */ mmc_data->flags |= TMIO_MMC_SDIO_IRQ; + /* + * All SDHI have CMD12 controll bit + */ + mmc_data->flags |= TMIO_MMC_HAVE_CMD12_CTRL; + if (of_id && of_id->data) { const struct sh_mobile_sdhi_of_data *of_data = of_id->data; mmc_data->flags |= of_data->tmio_flags; diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c index 3d0ad737abea..c2804827be9f 100644 --- a/drivers/mmc/host/tmio_mmc_pio.c +++ b/drivers/mmc/host/tmio_mmc_pio.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -310,6 +311,7 @@ static void tmio_mmc_done_work(struct work_struct *work) #define TRANSFER_READ 0x1000 #define TRANSFER_MULTI 0x2000 #define SECURITY_CMD 0x4000 +#define NO_CMD12_ISSUE 0x4000 /* TMIO_MMC_HAVE_CMD12_CTRL */ static int tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command *cmd) { @@ -346,6 +348,14 @@ static int tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command if (data->blocks > 1) { sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, 0x100); c |= TRANSFER_MULTI; + + /* + * Disable auto CMD12 at IO_RW_EXTENDED when + * multiple block transfer + */ + if ((host->pdata->flags & TMIO_MMC_HAVE_CMD12_CTRL) && + (cmd->opcode == SD_IO_RW_EXTENDED)) + c |= NO_CMD12_ISSUE; } if (data->flags & MMC_DATA_READ) c |= TRANSFER_READ; diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 777e29b1ad0f..7432d95b08e2 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -83,6 +83,12 @@ */ #define TMIO_MMC_HAVE_HIGH_REG (1 << 6) +/* + * Some controllers have CMD12 automatically + * issue/non-issue register + */ +#define TMIO_MMC_HAVE_CMD12_CTRL (1 << 7) + int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); -- cgit v1.2.3 From 6b98757e53cb0e93b02db4067c14afcb32c90615 Mon Sep 17 00:00:00 2001 From: Shinobu Uehara Date: Sun, 24 Aug 2014 20:00:52 -0700 Subject: mmc: tmio: add TMIO_MMC_SDIO_STATUS_QUIRK Renesas R-Car SDHI should set reserved bits on CTL_SDIO_STATUS register when writing. This patch adds new TMIO_MMC_SDIO_STATUS_QUIRK flags for this purpose [Kuninori Morimoto: tidyuped for upstreaming enabled this flags for all SH-Mobile/R-Car] Tested-by: Nguyen Xuan Nui Tested-by: Hiep Cao Minh Signed-off-by: Shinobu Uehara Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mobile_sdhi.c | 5 +++++ drivers/mmc/host/tmio_mmc_pio.c | 7 ++++++- include/linux/mfd/tmio.h | 5 +++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index 4727586b063e..ebcfc659b799 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -230,6 +230,11 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev) */ mmc_data->flags |= TMIO_MMC_HAVE_CMD12_CTRL; + /* + * All SDHI need SDIO_INFO1 reserved bit + */ + mmc_data->flags |= TMIO_MMC_SDIO_STATUS_QUIRK; + if (of_id && of_id->data) { const struct sh_mobile_sdhi_of_data *of_data = of_id->data; mmc_data->flags |= of_data->tmio_flags; diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c index c2804827be9f..c5ffa92ccb18 100644 --- a/drivers/mmc/host/tmio_mmc_pio.c +++ b/drivers/mmc/host/tmio_mmc_pio.c @@ -665,6 +665,7 @@ irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid) struct mmc_host *mmc = host->mmc; struct tmio_mmc_data *pdata = host->pdata; unsigned int ireg, status; + unsigned int sdio_status; if (!(pdata->flags & TMIO_MMC_SDIO_IRQ)) return IRQ_HANDLED; @@ -672,7 +673,11 @@ irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid) status = sd_ctrl_read16(host, CTL_SDIO_STATUS); ireg = status & TMIO_SDIO_MASK_ALL & ~host->sdcard_irq_mask; - sd_ctrl_write16(host, CTL_SDIO_STATUS, status & ~TMIO_SDIO_MASK_ALL); + sdio_status = status & ~TMIO_SDIO_MASK_ALL; + if (pdata->flags & TMIO_MMC_SDIO_STATUS_QUIRK) + sdio_status |= 6; + + sd_ctrl_write16(host, CTL_SDIO_STATUS, sdio_status); if (mmc->caps & MMC_CAP_SDIO_IRQ && ireg & TMIO_SDIO_STAT_IOIRQ) mmc_signal_sdio_irq(mmc); diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 7432d95b08e2..a7493ae01b58 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -89,6 +89,11 @@ */ #define TMIO_MMC_HAVE_CMD12_CTRL (1 << 7) +/* + * Some controllers needs to set 1 on SDIO status reserved bits + */ +#define TMIO_MMC_SDIO_STATUS_QUIRK (1 << 8) + int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); -- cgit v1.2.3 From e85dd04ea8c8d32ba8eae278959d28df34338e9d Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Sun, 24 Aug 2014 20:01:54 -0700 Subject: mmc: tmio: remove Renesas specific #ifdef This patch adds new TMIO_MMC_HAVE_CTL_DMA_REG flag, and remove Renesas specific #ifdef from tmio driver Tested-by: Nguyen Xuan Nui Tested-by: Hiep Cao Minh Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mobile_sdhi.c | 5 +++++ drivers/mmc/host/tmio_mmc_dma.c | 6 ++---- include/linux/mfd/tmio.h | 5 +++++ 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index ebcfc659b799..a077da02bb8e 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -235,6 +235,11 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev) */ mmc_data->flags |= TMIO_MMC_SDIO_STATUS_QUIRK; + /* + * All SDHI have DMA control register + */ + mmc_data->flags |= TMIO_MMC_HAVE_CTL_DMA_REG; + if (of_id && of_id->data) { const struct sh_mobile_sdhi_of_data *of_data = of_id->data; mmc_data->flags |= of_data->tmio_flags; diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c index bd646b041085..7d077388b9eb 100644 --- a/drivers/mmc/host/tmio_mmc_dma.c +++ b/drivers/mmc/host/tmio_mmc_dma.c @@ -28,10 +28,8 @@ void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable) if (!host->chan_tx || !host->chan_rx) return; -#if defined(CONFIG_SUPERH) || defined(CONFIG_ARCH_SHMOBILE) - /* Switch DMA mode on or off - SuperH specific? */ - sd_ctrl_write16(host, CTL_DMA_ENABLE, enable ? 2 : 0); -#endif + if (host->pdata->flags & TMIO_MMC_HAVE_CTL_DMA_REG) + sd_ctrl_write16(host, CTL_DMA_ENABLE, enable ? 2 : 0); } void tmio_mmc_abort_dma(struct tmio_mmc_host *host) diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index a7493ae01b58..adcb0cdb2fdb 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -94,6 +94,11 @@ */ #define TMIO_MMC_SDIO_STATUS_QUIRK (1 << 8) +/* + * Some controllers have DMA enable/disable register + */ +#define TMIO_MMC_HAVE_CTL_DMA_REG (1 << 9) + int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); -- cgit v1.2.3 From da29fe2bf573f0ae56fdc2e790387cb73fc8c6f8 Mon Sep 17 00:00:00 2001 From: Shinobu Uehara Date: Sun, 24 Aug 2014 20:03:00 -0700 Subject: mmc: tmio: add actual clock support as option Some controller is supporting actual clock on SD_CLK_CTRL :: DIV[7:0]. Renesas SH-Mobile SDHI doesn't support, but, Renesas R-Car SDHI supports it. This patch adds new TMIO_MMC_CLK_ACTUAL flag for it. [Kuninori Morimoto: tidyuped for upstreaming] Tested-by: Nguyen Xuan Nui Tested-by: Hiep Cao Minh Signed-off-by: Shinobu Uehara Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mobile_sdhi.c | 6 ++++-- drivers/mmc/host/tmio_mmc_pio.c | 5 +++++ include/linux/mfd/tmio.h | 5 +++++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c index a077da02bb8e..d5d493719491 100644 --- a/drivers/mmc/host/sh_mobile_sdhi.c +++ b/drivers/mmc/host/sh_mobile_sdhi.c @@ -49,12 +49,14 @@ static const struct sh_mobile_sdhi_of_data sh_mobile_sdhi_of_cfg[] = { }; static const struct sh_mobile_sdhi_of_data of_rcar_gen1_compatible = { - .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE, + .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE | + TMIO_MMC_CLK_ACTUAL, .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ, }; static const struct sh_mobile_sdhi_of_data of_rcar_gen2_compatible = { - .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE, + .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE | + TMIO_MMC_CLK_ACTUAL, .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ, .capabilities2 = MMC_CAP2_NO_MULTI_READ, .dma_rx_offset = 0x2000, diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c index 7cfe939992de..ba454131f9a8 100644 --- a/drivers/mmc/host/tmio_mmc_pio.c +++ b/drivers/mmc/host/tmio_mmc_pio.c @@ -159,6 +159,11 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host, for (clock = host->mmc->f_min, clk = 0x80000080; new_clock >= (clock<<1); clk >>= 1) clock <<= 1; + + /* 1/1 clock is option */ + if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) && + ((clk >> 22) & 0x1)) + clk |= 0xff; } if (host->set_clk_div) diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index adcb0cdb2fdb..90436d523e5e 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -99,6 +99,11 @@ */ #define TMIO_MMC_HAVE_CTL_DMA_REG (1 << 9) +/* + * Some controllers allows to set SDx actual clock + */ +#define TMIO_MMC_CLK_ACTUAL (1 << 10) + int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); -- cgit v1.2.3 From 51da2240906cb94e8f6ba55e403b6206df6fb2dd Mon Sep 17 00:00:00 2001 From: Yuvaraj CD Date: Fri, 22 Aug 2014 19:17:50 +0530 Subject: mmc: dw_mmc: use mmc_regulator_get_supply to handle regulators This patch makes use of mmc_regulator_get_supply() to handle the vmmc and vqmmc regulators.Also it moves the code handling the these regulators to dw_mci_set_ios().It turned on the vmmc and vqmmc during MMC_POWER_UP and MMC_POWER_ON,and turned off during MMC_POWER_OFF. Signed-off-by: Yuvaraj Kumar C D Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 72 ++++++++++++++++++++++------------------------ include/linux/mmc/dw_mmc.h | 2 +- 2 files changed, 36 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 7f227e964265..aadb0d6aa63f 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -936,6 +936,7 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) struct dw_mci_slot *slot = mmc_priv(mmc); const struct dw_mci_drv_data *drv_data = slot->host->drv_data; u32 regs; + int ret; switch (ios->bus_width) { case MMC_BUS_WIDTH_4: @@ -974,12 +975,38 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) switch (ios->power_mode) { case MMC_POWER_UP: + if (!IS_ERR(mmc->supply.vmmc)) { + ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, + ios->vdd); + if (ret) { + dev_err(slot->host->dev, + "failed to enable vmmc regulator\n"); + /*return, if failed turn on vmmc*/ + return; + } + } + if (!IS_ERR(mmc->supply.vqmmc) && !slot->host->vqmmc_enabled) { + ret = regulator_enable(mmc->supply.vqmmc); + if (ret < 0) + dev_err(slot->host->dev, + "failed to enable vqmmc regulator\n"); + else + slot->host->vqmmc_enabled = true; + } set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags); regs = mci_readl(slot->host, PWREN); regs |= (1 << slot->id); mci_writel(slot->host, PWREN, regs); break; case MMC_POWER_OFF: + if (!IS_ERR(mmc->supply.vmmc)) + mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0); + + if (!IS_ERR(mmc->supply.vqmmc) && slot->host->vqmmc_enabled) { + regulator_disable(mmc->supply.vqmmc); + slot->host->vqmmc_enabled = false; + } + regs = mci_readl(slot->host, PWREN); regs &= ~(1 << slot->id); mci_writel(slot->host, PWREN, regs); @@ -2110,7 +2137,13 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id) mmc->f_max = freq[1]; } - mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; + /*if there are external regulators, get them*/ + ret = mmc_regulator_get_supply(mmc); + if (ret == -EPROBE_DEFER) + goto err_setup_bus; + + if (!mmc->ocr_avail) + mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; if (host->pdata->caps) mmc->caps = host->pdata->caps; @@ -2176,7 +2209,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id) err_setup_bus: mmc_free_host(mmc); - return -EINVAL; + return ret; } static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id) @@ -2469,24 +2502,6 @@ int dw_mci_probe(struct dw_mci *host) } } - host->vmmc = devm_regulator_get_optional(host->dev, "vmmc"); - if (IS_ERR(host->vmmc)) { - ret = PTR_ERR(host->vmmc); - if (ret == -EPROBE_DEFER) - goto err_clk_ciu; - - dev_info(host->dev, "no vmmc regulator found: %d\n", ret); - host->vmmc = NULL; - } else { - ret = regulator_enable(host->vmmc); - if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(host->dev, - "regulator_enable fail: %d\n", ret); - goto err_clk_ciu; - } - } - host->quirks = host->pdata->quirks; spin_lock_init(&host->lock); @@ -2630,8 +2645,6 @@ err_workqueue: err_dmaunmap: if (host->use_dma && host->dma_ops->exit) host->dma_ops->exit(host); - if (host->vmmc) - regulator_disable(host->vmmc); err_clk_ciu: if (!IS_ERR(host->ciu_clk)) @@ -2667,9 +2680,6 @@ void dw_mci_remove(struct dw_mci *host) if (host->use_dma && host->dma_ops->exit) host->dma_ops->exit(host); - if (host->vmmc) - regulator_disable(host->vmmc); - if (!IS_ERR(host->ciu_clk)) clk_disable_unprepare(host->ciu_clk); @@ -2686,9 +2696,6 @@ EXPORT_SYMBOL(dw_mci_remove); */ int dw_mci_suspend(struct dw_mci *host) { - if (host->vmmc) - regulator_disable(host->vmmc); - return 0; } EXPORT_SYMBOL(dw_mci_suspend); @@ -2697,15 +2704,6 @@ int dw_mci_resume(struct dw_mci *host) { int i, ret; - if (host->vmmc) { - ret = regulator_enable(host->vmmc); - if (ret) { - dev_err(host->dev, - "failed to enable regulator: %d\n", ret); - return ret; - } - } - if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS)) { ret = -ENODEV; return ret; diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 29ce014ab421..84e2827d0f0b 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -188,7 +188,7 @@ struct dw_mci { /* Workaround flags */ u32 quirks; - struct regulator *vmmc; /* Power regulator */ + bool vqmmc_enabled; unsigned long irq_flags; /* IRQ flags */ int irq; }; -- cgit v1.2.3 From 0173055842cd1d9ed3984e70891c22dbf2f29372 Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Fri, 22 Aug 2014 19:17:51 +0530 Subject: mmc: dw_mmc: Support voltage changes For UHS cards we need the ability to switch voltages from 3.3V to 1.8V. Add support to the dw_mmc driver to handle this. Note that dw_mmc needs a little bit of extra code since the interface needs a special bit programmed to the CMD register while CMD11 is progressing. This means adding a few extra states to the state machine to track. Signed-off-by: Doug Anderson Signed-off-by: Yuvaraj Kumar C D Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 137 ++++++++++++++++++++++++++++++++++++++++++--- drivers/mmc/host/dw_mmc.h | 5 +- include/linux/mmc/dw_mmc.h | 2 + 3 files changed, 134 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index aadb0d6aa63f..23719249182b 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -234,10 +235,13 @@ err: } #endif /* defined(CONFIG_DEBUG_FS) */ +static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg); + static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd) { struct mmc_data *data; struct dw_mci_slot *slot = mmc_priv(mmc); + struct dw_mci *host = slot->host; const struct dw_mci_drv_data *drv_data = slot->host->drv_data; u32 cmdr; cmd->error = -EINPROGRESS; @@ -253,6 +257,34 @@ static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd) else if (cmd->opcode != MMC_SEND_STATUS && cmd->data) cmdr |= SDMMC_CMD_PRV_DAT_WAIT; + if (cmd->opcode == SD_SWITCH_VOLTAGE) { + u32 clk_en_a; + + /* Special bit makes CMD11 not die */ + cmdr |= SDMMC_CMD_VOLT_SWITCH; + + /* Change state to continue to handle CMD11 weirdness */ + WARN_ON(slot->host->state != STATE_SENDING_CMD); + slot->host->state = STATE_SENDING_CMD11; + + /* + * We need to disable low power mode (automatic clock stop) + * while doing voltage switch so we don't confuse the card, + * since stopping the clock is a specific part of the UHS + * voltage change dance. + * + * Note that low power mode (SDMMC_CLKEN_LOW_PWR) will be + * unconditionally turned back on in dw_mci_setup_bus() if it's + * ever called with a non-zero clock. That shouldn't happen + * until the voltage change is all done. + */ + clk_en_a = mci_readl(host, CLKENA); + clk_en_a &= ~(SDMMC_CLKEN_LOW_PWR << slot->id); + mci_writel(host, CLKENA, clk_en_a); + mci_send_cmd(slot, SDMMC_CMD_UPD_CLK | + SDMMC_CMD_PRV_DAT_WAIT, 0); + } + if (cmd->flags & MMC_RSP_PRESENT) { /* We expect a response, so set this bit */ cmdr |= SDMMC_CMD_RESP_EXP; @@ -775,11 +807,15 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit) unsigned int clock = slot->clock; u32 div; u32 clk_en_a; + u32 sdmmc_cmd_bits = SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT; + + /* We must continue to set bit 28 in CMD until the change is complete */ + if (host->state == STATE_WAITING_CMD11_DONE) + sdmmc_cmd_bits |= SDMMC_CMD_VOLT_SWITCH; if (!clock) { mci_writel(host, CLKENA, 0); - mci_send_cmd(slot, - SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0); + mci_send_cmd(slot, sdmmc_cmd_bits, 0); } else if (clock != host->current_speed || force_clkinit) { div = host->bus_hz / clock; if (host->bus_hz % clock && host->bus_hz > clock) @@ -803,15 +839,13 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit) mci_writel(host, CLKSRC, 0); /* inform CIU */ - mci_send_cmd(slot, - SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0); + mci_send_cmd(slot, sdmmc_cmd_bits, 0); /* set clock to desired speed */ mci_writel(host, CLKDIV, div); /* inform CIU */ - mci_send_cmd(slot, - SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0); + mci_send_cmd(slot, sdmmc_cmd_bits, 0); /* enable clock; only low power if no SDIO */ clk_en_a = SDMMC_CLKEN_ENABLE << slot->id; @@ -820,8 +854,7 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit) mci_writel(host, CLKENA, clk_en_a); /* inform CIU */ - mci_send_cmd(slot, - SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0); + mci_send_cmd(slot, sdmmc_cmd_bits, 0); /* keep the clock with reflecting clock dividor */ slot->__clk_old = clock << div; @@ -897,6 +930,17 @@ static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot, slot->mrq = mrq; + if (host->state == STATE_WAITING_CMD11_DONE) { + dev_warn(&slot->mmc->class_dev, + "Voltage change didn't complete\n"); + /* + * this case isn't expected to happen, so we can + * either crash here or just try to continue on + * in the closest possible state + */ + host->state = STATE_IDLE; + } + if (host->state == STATE_IDLE) { host->state = STATE_SENDING_CMD; dw_mci_start_request(host, slot); @@ -973,6 +1017,9 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) /* Slot specific timing and width adjustment */ dw_mci_setup_bus(slot, false); + if (slot->host->state == STATE_WAITING_CMD11_DONE && ios->clock != 0) + slot->host->state = STATE_IDLE; + switch (ios->power_mode) { case MMC_POWER_UP: if (!IS_ERR(mmc->supply.vmmc)) { @@ -1016,6 +1063,59 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } } +static int dw_mci_card_busy(struct mmc_host *mmc) +{ + struct dw_mci_slot *slot = mmc_priv(mmc); + u32 status; + + /* + * Check the busy bit which is low when DAT[3:0] + * (the data lines) are 0000 + */ + status = mci_readl(slot->host, STATUS); + + return !!(status & SDMMC_STATUS_BUSY); +} + +static int dw_mci_switch_voltage(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct dw_mci_slot *slot = mmc_priv(mmc); + struct dw_mci *host = slot->host; + u32 uhs; + u32 v18 = SDMMC_UHS_18V << slot->id; + int min_uv, max_uv; + int ret; + + /* + * Program the voltage. Note that some instances of dw_mmc may use + * the UHS_REG for this. For other instances (like exynos) the UHS_REG + * does no harm but you need to set the regulator directly. Try both. + */ + uhs = mci_readl(host, UHS_REG); + if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_330) { + min_uv = 2700000; + max_uv = 3600000; + uhs &= ~v18; + } else { + min_uv = 1700000; + max_uv = 1950000; + uhs |= v18; + } + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = regulator_set_voltage(mmc->supply.vqmmc, min_uv, max_uv); + + if (ret) { + dev_err(&mmc->class_dev, + "Regulator set error %d: %d - %d\n", + ret, min_uv, max_uv); + return ret; + } + } + mci_writel(host, UHS_REG, uhs); + + return 0; +} + static int dw_mci_get_ro(struct mmc_host *mmc) { int read_only; @@ -1158,6 +1258,9 @@ static const struct mmc_host_ops dw_mci_ops = { .get_cd = dw_mci_get_cd, .enable_sdio_irq = dw_mci_enable_sdio_irq, .execute_tuning = dw_mci_execute_tuning, + .card_busy = dw_mci_card_busy, + .start_signal_voltage_switch = dw_mci_switch_voltage, + }; static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq) @@ -1181,7 +1284,11 @@ static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq) dw_mci_start_request(host, slot); } else { dev_vdbg(host->dev, "list empty\n"); - host->state = STATE_IDLE; + + if (host->state == STATE_SENDING_CMD11) + host->state = STATE_WAITING_CMD11_DONE; + else + host->state = STATE_IDLE; } spin_unlock(&host->lock); @@ -1292,8 +1399,10 @@ static void dw_mci_tasklet_func(unsigned long priv) switch (state) { case STATE_IDLE: + case STATE_WAITING_CMD11_DONE: break; + case STATE_SENDING_CMD11: case STATE_SENDING_CMD: if (!test_and_clear_bit(EVENT_CMD_COMPLETE, &host->pending_events)) @@ -1894,6 +2003,14 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) } if (pending) { + /* Check volt switch first, since it can look like an error */ + if ((host->state == STATE_SENDING_CMD11) && + (pending & SDMMC_INT_VOLT_SWITCH)) { + mci_writel(host, RINTSTS, SDMMC_INT_VOLT_SWITCH); + pending &= ~SDMMC_INT_VOLT_SWITCH; + dw_mci_cmd_interrupt(host, pending); + } + if (pending & DW_MCI_CMD_ERROR_FLAGS) { mci_writel(host, RINTSTS, DW_MCI_CMD_ERROR_FLAGS); host->cmd_status = pending; @@ -1999,7 +2116,9 @@ static void dw_mci_work_routine_card(struct work_struct *work) switch (host->state) { case STATE_IDLE: + case STATE_WAITING_CMD11_DONE: break; + case STATE_SENDING_CMD11: case STATE_SENDING_CMD: mrq->cmd->error = -ENOMEDIUM; if (!mrq->data) diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h index 08fd956d81f3..01b99e8a9190 100644 --- a/drivers/mmc/host/dw_mmc.h +++ b/drivers/mmc/host/dw_mmc.h @@ -99,6 +99,7 @@ #define SDMMC_INT_HLE BIT(12) #define SDMMC_INT_FRUN BIT(11) #define SDMMC_INT_HTO BIT(10) +#define SDMMC_INT_VOLT_SWITCH BIT(10) /* overloads bit 10! */ #define SDMMC_INT_DRTO BIT(9) #define SDMMC_INT_RTO BIT(8) #define SDMMC_INT_DCRC BIT(7) @@ -113,6 +114,7 @@ /* Command register defines */ #define SDMMC_CMD_START BIT(31) #define SDMMC_CMD_USE_HOLD_REG BIT(29) +#define SDMMC_CMD_VOLT_SWITCH BIT(28) #define SDMMC_CMD_CCS_EXP BIT(23) #define SDMMC_CMD_CEATA_RD BIT(22) #define SDMMC_CMD_UPD_CLK BIT(21) @@ -130,6 +132,7 @@ /* Status register defines */ #define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FFF) #define SDMMC_STATUS_DMA_REQ BIT(31) +#define SDMMC_STATUS_BUSY BIT(9) /* FIFOTH register defines */ #define SDMMC_SET_FIFOTH(m, r, t) (((m) & 0x7) << 28 | \ ((r) & 0xFFF) << 16 | \ @@ -150,7 +153,7 @@ #define SDMMC_GET_VERID(x) ((x) & 0xFFFF) /* Card read threshold */ #define SDMMC_SET_RD_THLD(v, x) (((v) & 0x1FFF) << 16 | (x)) - +#define SDMMC_UHS_18V BIT(0) /* All ctrl reset bits */ #define SDMMC_CTRL_ALL_RESET_FLAGS \ (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | SDMMC_CTRL_DMA_RESET) diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 84e2827d0f0b..001366927cf4 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -26,6 +26,8 @@ enum dw_mci_state { STATE_DATA_BUSY, STATE_SENDING_STOP, STATE_DATA_ERROR, + STATE_SENDING_CMD11, + STATE_WAITING_CMD11_DONE, }; enum { -- cgit v1.2.3 From e99783a45220a2c5f5a598e0e81213ecf2dbcf2f Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Sat, 30 Aug 2014 12:40:40 +0900 Subject: mmc: sdhci: handle busy-end interrupt during command It is fully legal for a controller to start handling busy-end interrupt before it has signaled that the command has completed. So make sure we do things in the proper order, Or it results that command interrupt is ignored so it can cause unexpected operations. This is founded at some toshiba emmc with the bellow warning. "mmc0: Got command interrupt 0x00000001 even though no command operation was in progress." This issue has been also reported by Youssef TRIKI: It is not specific to Toshiba devices, and happens with eMMC devices as well as SD card which support Auto-CMD12 rather than CMD23. Also, similar patch is submitted by: Gwendal Grignou Changes since v1: Fixed conflict with the next of git.linaro.org/people/ulf.hansson/mmc.git and Tested if issue is fixed again. Signed-off-by: Hankyung Yu Signed-off-by: Chanho Min Tested-by: Youssef TRIKI Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 17 +++++++++++++++-- include/linux/mmc/sdhci.h | 1 + 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index b5e22b8c4885..335cdf3b5224 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1016,6 +1016,7 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd) mod_timer(&host->timer, timeout); host->cmd = cmd; + host->busy_handle = 0; sdhci_prepare_data(host, cmd); @@ -2261,8 +2262,12 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask) if (host->cmd->data) DBG("Cannot wait for busy signal when also " "doing a data transfer"); - else if (!(host->quirks & SDHCI_QUIRK_NO_BUSY_IRQ)) + else if (!(host->quirks & SDHCI_QUIRK_NO_BUSY_IRQ) + && !host->busy_handle) { + /* Mark that command complete before busy is ended */ + host->busy_handle = 1; return; + } /* The controller does not support the end-of-busy IRQ, * fall through and take the SDHCI_INT_RESPONSE */ @@ -2330,7 +2335,15 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) return; } if (intmask & SDHCI_INT_DATA_END) { - sdhci_finish_command(host); + /* + * Some cards handle busy-end interrupt + * before the command completed, so make + * sure we do things in the proper order. + */ + if (host->busy_handle) + sdhci_finish_command(host); + else + host->busy_handle = 1; return; } } diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 09ebe57d5ce9..0aa85ca0c788 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -146,6 +146,7 @@ struct sdhci_host { struct mmc_command *cmd; /* Current command */ struct mmc_data *data; /* Current data request */ unsigned int data_early:1; /* Data finished before cmd */ + unsigned int busy_handle:1; /* Handling the order of Busy-end */ struct sg_mapping_iter sg_miter; /* SG state for PIO */ unsigned int blocks; /* remaining PIO blocks */ -- cgit v1.2.3 From 2e47e84245adcb1b3872210678b6146f674fb3ff Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 2 Sep 2014 19:08:53 -0700 Subject: mmc: Add .multi_io_quirk callback for multi I/O HW bug Historically, we have been using MMC_CAP* to handle host HW issues and currently the block layer uses MMC_CAP2_NO_MULTI_READ flag for a multi I/O HW bug workaround. There are a few tweaks needed to make MMC_CAP2_NO_MULTI_READ suite all situations. Therefore let's add an optional host ops callback to enable host drivers to return the number of blocks it allows per request. In a future patch and when host drivers have converted to the new callback, MMC_CAP2_NO_MULTI_READ shall be removed. Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/card/block.c | 10 ++++++++++ include/linux/mmc/host.h | 7 +++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index ede41f05c392..adab9038f6f7 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -1402,6 +1402,16 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, if (card->host->caps2 & MMC_CAP2_NO_MULTI_READ && rq_data_dir(req) == READ) brq->data.blocks = 1; + + /* + * Some controllers have HW issues while operating + * in multiple I/O mode + */ + if (card->host->ops->multi_io_quirk) + brq->data.blocks = card->host->ops->multi_io_quirk(card, + (rq_data_dir(req) == READ) ? + MMC_DATA_READ : MMC_DATA_WRITE, + brq->data.blocks); } if (brq->data.blocks > 1 || do_rel_wr) { diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 4cbf61476999..10e2bd6985ae 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -139,6 +139,13 @@ struct mmc_host_ops { int (*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv); void (*hw_reset)(struct mmc_host *host); void (*card_event)(struct mmc_host *host); + + /* + * Optional callback to support controllers with HW issues for multiple + * I/O. Returns the number of supported blocks for the request. + */ + int (*multi_io_quirk)(struct mmc_card *card, + unsigned int direction, int blk_size); }; struct mmc_card; -- cgit v1.2.3 From bbf0208d39121bd8873b032459cb2b5f35e14593 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 8 Sep 2014 23:45:25 -0700 Subject: mmc: use .multi_io_quirk on tmio_mmc Now, tmio_mmc can use .multi_io_quirk callback instead of MMC_CAP2_NO_MULTI_READ flags. let's use it. Signed-off-by: Kuninori Morimoto Acked-by: Lee Jones Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_pio.c | 13 +++++++++++++ include/linux/mfd/tmio.h | 3 +++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c index ba454131f9a8..ff5ff0f725c9 100644 --- a/drivers/mmc/host/tmio_mmc_pio.c +++ b/drivers/mmc/host/tmio_mmc_pio.c @@ -970,12 +970,25 @@ static int tmio_mmc_get_ro(struct mmc_host *mmc) return ret; } +static int tmio_multi_io_quirk(struct mmc_card *card, + unsigned int direction, int blk_size) +{ + struct tmio_mmc_host *host = mmc_priv(card->host); + struct tmio_mmc_data *pdata = host->pdata; + + if (pdata->multi_io_quirk) + return pdata->multi_io_quirk(card, direction, blk_size); + + return blk_size; +} + static const struct mmc_host_ops tmio_mmc_ops = { .request = tmio_mmc_request, .set_ios = tmio_mmc_set_ios, .get_ro = tmio_mmc_get_ro, .get_cd = mmc_gpio_get_cd, .enable_sdio_irq = tmio_mmc_enable_sdio_irq, + .multi_io_quirk = tmio_multi_io_quirk, }; static int tmio_mmc_init_ocr(struct tmio_mmc_host *host) diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 90436d523e5e..57388171610d 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -142,6 +143,8 @@ struct tmio_mmc_data { /* clock management callbacks */ int (*clk_enable)(struct platform_device *pdev, unsigned int *f); void (*clk_disable)(struct platform_device *pdev); + int (*multi_io_quirk)(struct mmc_card *card, + unsigned int direction, int blk_size); }; /* -- cgit v1.2.3 From 0abb71feb228ddbd17e0dfa13216541e036bb549 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 8 Sep 2014 23:46:49 -0700 Subject: mmc: remove MMC_CAP2_NO_MULTI_READ flags Now, mmc framework uses multi_io_quirk for I/O HW bug workaround. MMC_CAP2_NO_MULTI_READ flag is no longer needed Signed-off-by: Kuninori Morimoto Signed-off-by: Ulf Hansson --- drivers/mmc/card/block.c | 5 ----- include/linux/mmc/host.h | 1 - 2 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index adab9038f6f7..413f984cb76d 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -1398,11 +1398,6 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, if (disable_multi) brq->data.blocks = 1; - /* Some controllers can't do multiblock reads due to hw bugs */ - if (card->host->caps2 & MMC_CAP2_NO_MULTI_READ && - rq_data_dir(req) == READ) - brq->data.blocks = 1; - /* * Some controllers have HW issues while operating * in multiple I/O mode diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 10e2bd6985ae..797ae657dc3d 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -272,7 +272,6 @@ struct mmc_host { #define MMC_CAP2_BOOTPART_NOACC (1 << 0) /* Boot partition no access */ #define MMC_CAP2_FULL_PWR_CYCLE (1 << 2) /* Can do full power cycle */ -#define MMC_CAP2_NO_MULTI_READ (1 << 3) /* Multiblock reads don't work */ #define MMC_CAP2_HS200_1_8V_SDR (1 << 5) /* can support */ #define MMC_CAP2_HS200_1_2V_SDR (1 << 6) /* can support */ #define MMC_CAP2_HS200 (MMC_CAP2_HS200_1_8V_SDR | \ -- cgit v1.2.3 From 9d2fa2428ae149ba3a5b7a4ceb0a9e11f1882b3b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 27 Aug 2014 13:00:51 +0200 Subject: mmc: slot-gpio: add gpiod variant to get wp GPIO This makes it possible to get the write protect (read only) GPIO line from a GPIO descriptor. Written to exactly mirror the card detect function. Acked-by: Alexandre Courbot Signed-off-by: Linus Walleij Signed-off-by: Ulf Hansson --- drivers/mmc/core/slot-gpio.c | 48 +++++++++++++++++++++++++++++++++++++++++++ include/linux/mmc/slot-gpio.h | 3 +++ 2 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index 908c2b29e79f..e3fce4493fab 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -325,6 +325,54 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, } EXPORT_SYMBOL(mmc_gpiod_request_cd); +/** + * mmc_gpiod_request_ro - request a gpio descriptor for write protection + * @host: mmc host + * @con_id: function within the GPIO consumer + * @idx: index of the GPIO to obtain in the consumer + * @override_active_level: ignore %GPIO_ACTIVE_LOW flag + * @debounce: debounce time in microseconds + * + * Use this function in place of mmc_gpio_request_ro() to use the GPIO + * descriptor API. Note that it is paired with mmc_gpiod_free_ro() not + * mmc_gpio_free_ro(). + * + * Returns zero on success, else an error. + */ +int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, + unsigned int idx, bool override_active_level, + unsigned int debounce) +{ + struct mmc_gpio *ctx; + struct gpio_desc *desc; + int ret; + + ret = mmc_gpio_alloc(host); + if (ret < 0) + return ret; + + ctx = host->slot.handler_priv; + + if (!con_id) + con_id = ctx->ro_label; + + desc = devm_gpiod_get_index(host->parent, con_id, idx, GPIOD_IN); + if (IS_ERR(desc)) + return PTR_ERR(desc); + + if (debounce) { + ret = gpiod_set_debounce(desc, debounce); + if (ret < 0) + return ret; + } + + ctx->override_ro_active_level = override_active_level; + ctx->ro_gpio = desc; + + return 0; +} +EXPORT_SYMBOL(mmc_gpiod_request_ro); + /** * mmc_gpiod_free_cd - free the card-detection gpio descriptor * @host: mmc host diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index d2433381e828..a0d0442c15bf 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -25,6 +25,9 @@ void mmc_gpio_free_cd(struct mmc_host *host); int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, unsigned int idx, bool override_active_level, unsigned int debounce); +int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, + unsigned int idx, bool override_active_level, + unsigned int debounce); void mmc_gpiod_free_cd(struct mmc_host *host); void mmc_gpiod_request_cd_irq(struct mmc_host *host); -- cgit v1.2.3 From 3034a146820c26fe6da66a45f6340fe87fe0983a Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Fri, 27 Jun 2014 18:15:44 +0300 Subject: ima: pass 'opened' flag to identify newly created files Empty files and missing xattrs do not guarantee that a file was just created. This patch passes FILE_CREATED flag to IMA to reliably identify new files. Signed-off-by: Dmitry Kasatkin Signed-off-by: Mimi Zohar Cc: 3.14+ --- fs/namei.c | 2 +- fs/nfsd/vfs.c | 2 +- include/linux/ima.h | 4 ++-- security/integrity/ima/ima.h | 4 ++-- security/integrity/ima/ima_appraise.c | 4 ++-- security/integrity/ima/ima_main.c | 16 ++++++++-------- 6 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 985c6f368485..005771f97189 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3058,7 +3058,7 @@ opened: error = open_check_o_direct(file); if (error) goto exit_fput; - error = ima_file_check(file, op->acc_mode); + error = ima_file_check(file, op->acc_mode, *opened); if (error) goto exit_fput; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 140c496f612c..d49c778faecb 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -709,7 +709,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, host_err = PTR_ERR(*filp); *filp = NULL; } else { - host_err = ima_file_check(*filp, may_flags); + host_err = ima_file_check(*filp, may_flags, 0); if (may_flags & NFSD_MAY_64BIT_COOKIE) (*filp)->f_mode |= FMODE_64BITHASH; diff --git a/include/linux/ima.h b/include/linux/ima.h index 7cf5e9b32550..120ccc53fcb7 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -15,7 +15,7 @@ struct linux_binprm; #ifdef CONFIG_IMA extern int ima_bprm_check(struct linux_binprm *bprm); -extern int ima_file_check(struct file *file, int mask); +extern int ima_file_check(struct file *file, int mask, int opened); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern int ima_module_check(struct file *file); @@ -27,7 +27,7 @@ static inline int ima_bprm_check(struct linux_binprm *bprm) return 0; } -static inline int ima_file_check(struct file *file, int mask) +static inline int ima_file_check(struct file *file, int mask, int opened) { return 0; } diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 57da4bd7ba0c..0fb456c20eda 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -177,7 +177,7 @@ void ima_delete_rules(void); int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, struct file *file, const unsigned char *filename, struct evm_ima_xattr_data *xattr_value, - int xattr_len); + int xattr_len, int opened); int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func); void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file); enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint, @@ -193,7 +193,7 @@ static inline int ima_appraise_measurement(int func, struct file *file, const unsigned char *filename, struct evm_ima_xattr_data *xattr_value, - int xattr_len) + int xattr_len, int opened) { return INTEGRITY_UNKNOWN; } diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index a4605d677248..225fd944a4ef 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -183,7 +183,7 @@ int ima_read_xattr(struct dentry *dentry, int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, struct file *file, const unsigned char *filename, struct evm_ima_xattr_data *xattr_value, - int xattr_len) + int xattr_len, int opened) { static const char op[] = "appraise_data"; char *cause = "unknown"; @@ -203,7 +203,7 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint, cause = "missing-hash"; status = INTEGRITY_NOLABEL; - if (inode->i_size == 0) { + if (opened & FILE_CREATED) { iint->flags |= IMA_NEW_FILE; status = INTEGRITY_PASS; } diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 0a2298f90c9c..f82cf9b8e92b 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -157,7 +157,7 @@ void ima_file_free(struct file *file) } static int process_measurement(struct file *file, const char *filename, - int mask, int function) + int mask, int function, int opened) { struct inode *inode = file_inode(file); struct integrity_iint_cache *iint; @@ -226,7 +226,7 @@ static int process_measurement(struct file *file, const char *filename, xattr_value, xattr_len); if (action & IMA_APPRAISE_SUBMASK) rc = ima_appraise_measurement(_func, iint, file, pathname, - xattr_value, xattr_len); + xattr_value, xattr_len, opened); if (action & IMA_AUDIT) ima_audit_measurement(iint, pathname); kfree(pathbuf); @@ -255,7 +255,7 @@ out: int ima_file_mmap(struct file *file, unsigned long prot) { if (file && (prot & PROT_EXEC)) - return process_measurement(file, NULL, MAY_EXEC, MMAP_CHECK); + return process_measurement(file, NULL, MAY_EXEC, MMAP_CHECK, 0); return 0; } @@ -277,7 +277,7 @@ int ima_bprm_check(struct linux_binprm *bprm) return process_measurement(bprm->file, (strcmp(bprm->filename, bprm->interp) == 0) ? bprm->filename : bprm->interp, - MAY_EXEC, BPRM_CHECK); + MAY_EXEC, BPRM_CHECK, 0); } /** @@ -290,12 +290,12 @@ int ima_bprm_check(struct linux_binprm *bprm) * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ -int ima_file_check(struct file *file, int mask) +int ima_file_check(struct file *file, int mask, int opened) { ima_rdwr_violation_check(file); return process_measurement(file, NULL, mask & (MAY_READ | MAY_WRITE | MAY_EXEC), - FILE_CHECK); + FILE_CHECK, opened); } EXPORT_SYMBOL_GPL(ima_file_check); @@ -318,7 +318,7 @@ int ima_module_check(struct file *file) #endif return 0; /* We rely on module signature checking */ } - return process_measurement(file, NULL, MAY_EXEC, MODULE_CHECK); + return process_measurement(file, NULL, MAY_EXEC, MODULE_CHECK, 0); } int ima_fw_from_file(struct file *file, char *buf, size_t size) @@ -329,7 +329,7 @@ int ima_fw_from_file(struct file *file, char *buf, size_t size) return -EACCES; /* INTEGRITY_UNKNOWN */ return 0; } - return process_measurement(file, NULL, MAY_EXEC, FIRMWARE_CHECK); + return process_measurement(file, NULL, MAY_EXEC, FIRMWARE_CHECK, 0); } static int __init init_ima(void) -- cgit v1.2.3 From ef979a26e3d521d51dbd9950e46a69e303073171 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 9 Sep 2014 08:56:48 +0800 Subject: usb: gadget: add reset API at usb_gadget_driver Adding reset API for UDC bus reset handler is useful for below two issues. Current disconnect API at usb_gadget_driver is also invoked at udc's bus reset handler, but the document says it is invoked when the host is disconnected. Besides, we may expect the gadget_driver to do different things for host sends bus reset and host disconnects gadget, eg, we may not want to flush dirty page for mass storage at bus reset, and want to do it at disconnection. Acked-by: Alan Stern Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index c540557b564b..598a6e9b2850 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -817,6 +817,8 @@ static inline int usb_gadget_disconnect(struct usb_gadget *gadget) * Called in a context that permits sleeping. * @suspend: Invoked on USB suspend. May be called in_interrupt. * @resume: Invoked on USB resume. May be called in_interrupt. + * @reset: Invoked on USB bus reset. It is mandatory for all gadget drivers + * and should be called in_interrupt. * @driver: Driver model state for this driver. * * Devices are disabled till a gadget driver successfully bind()s, which @@ -874,6 +876,7 @@ struct usb_gadget_driver { void (*disconnect)(struct usb_gadget *); void (*suspend)(struct usb_gadget *); void (*resume)(struct usb_gadget *); + void (*reset)(struct usb_gadget *); /* FIXME support safe rmmod */ struct device_driver driver; -- cgit v1.2.3 From 02ab695bb37ee9ad515df0d0790d5977505dd04a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 4 Sep 2014 22:17:17 -0700 Subject: net: filter: add "load 64-bit immediate" eBPF instruction add BPF_LD_IMM64 instruction to load 64-bit immediate value into a register. All previous instructions were 8-byte. This is first 16-byte instruction. Two consecutive 'struct bpf_insn' blocks are interpreted as single instruction: insn[0].code = BPF_LD | BPF_DW | BPF_IMM insn[0].dst_reg = destination register insn[0].imm = lower 32-bit insn[1].code = 0 insn[1].imm = upper 32-bit All unused fields must be zero. Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads 32-bit immediate value into a register. x64 JITs it as single 'movabsq %rax, imm64' arm64 may JIT as sequence of four 'movk x0, #imm16, lsl #shift' insn Note that old eBPF programs are binary compatible with new interpreter. It helps eBPF programs load 64-bit constant into a register with one instruction instead of using two registers and 4 instructions: BPF_MOV32_IMM(R1, imm32) BPF_ALU64_IMM(BPF_LSH, R1, 32) BPF_MOV32_IMM(R2, imm32) BPF_ALU64_REG(BPF_OR, R1, R2) User space generated programs will use this instruction to load constants only. To tell kernel that user space needs a pointer the _pseudo_ variant of this instruction may be added later, which will use extra bits of encoding to indicate what type of pointer user space is asking kernel to provide. For example 'off' or 'src_reg' fields can be used for such purpose. src_reg = 1 could mean that user space is asking kernel to validate and load in-kernel map pointer. src_reg = 2 could mean that user space needs readonly data section pointer src_reg = 3 could mean that user space needs a pointer to per-cpu local data All such future pseudo instructions will not be carrying the actual pointer as part of the instruction, but rather will be treated as a request to kernel to provide one. The kernel will verify the request_for_a_pointer, then will drop _pseudo_ marking and will store actual internal pointer inside the instruction, so the end result is the interpreter and JITs never see pseudo BPF_LD_IMM64 insns and only operate on generic BPF_LD_IMM64 that loads 64-bit immediate into a register. User space never operates on direct pointers and verifier can easily recognize request_for_pointer vs other instructions. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 8 +++++++- arch/x86/net/bpf_jit_comp.c | 17 +++++++++++++++++ include/linux/filter.h | 18 ++++++++++++++++++ kernel/bpf/core.c | 5 +++++ lib/test_bpf.c | 21 +++++++++++++++++++++ 5 files changed, 68 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index c48a9704bda8..81916ab5d96f 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -951,7 +951,7 @@ Size modifier is one of ... Mode modifier is one of: - BPF_IMM 0x00 /* classic BPF only, reserved in eBPF */ + BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */ BPF_ABS 0x20 BPF_IND 0x40 BPF_MEM 0x60 @@ -995,6 +995,12 @@ BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and 2 byte atomic increments are not supported. +eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists +of two consecutive 'struct bpf_insn' 8-byte blocks and interpreted as single +instruction that loads 64-bit immediate value into a dst_reg. +Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads +32-bit immediate value into a register. + Testing ------- diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 39ccfbb4a723..06f8c17f5484 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -393,6 +393,23 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); break; + case BPF_LD | BPF_IMM | BPF_DW: + if (insn[1].code != 0 || insn[1].src_reg != 0 || + insn[1].dst_reg != 0 || insn[1].off != 0) { + /* verifier must catch invalid insns */ + pr_err("invalid BPF_LD_IMM64 insn\n"); + return -EINVAL; + } + + /* movabsq %rax, imm64 */ + EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); + EMIT(insn[0].imm, 4); + EMIT(insn[1].imm, 4); + + insn++; + i++; + break; + /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU | BPF_DIV | BPF_X: diff --git a/include/linux/filter.h b/include/linux/filter.h index c78994593355..bf323da77950 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -166,6 +166,24 @@ enum { .off = 0, \ .imm = IMM }) +/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ +#define BPF_LD_IMM64(DST, IMM) \ + BPF_LD_IMM64_RAW(DST, 0, IMM) + +#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = (__u32) (IMM) }), \ + ((struct bpf_insn) { \ + .code = 0, /* zero is reserved opcode */ \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((__u64) (IMM)) >> 32 }) + /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b54bb2c2e494..2c2bfaacce66 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -242,6 +242,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W, [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H, [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, + [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW, }; void *ptr; int off; @@ -301,6 +302,10 @@ select_insn: ALU64_MOV_K: DST = IMM; CONT; + LD_IMM_DW: + DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32; + insn++; + CONT; ALU64_ARSH_X: (*(s64 *) &DST) >>= SRC; CONT; diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 9a67456ba29a..413890815d3e 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1735,6 +1735,27 @@ static struct bpf_test tests[] = { { }, { { 1, 0 } }, }, + { + "load 64-bit immediate", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x567800001234L), + BPF_MOV64_REG(R2, R1), + BPF_MOV64_REG(R3, R2), + BPF_ALU64_IMM(BPF_RSH, R2, 32), + BPF_ALU64_IMM(BPF_LSH, R3, 32), + BPF_ALU64_IMM(BPF_RSH, R3, 32), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_JMP_IMM(BPF_JEQ, R2, 0x5678, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, R3, 0x1234, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, }; static struct net_device dev; -- cgit v1.2.3 From daedfb22451dd02b35c0549566cbb7cc06bdd53b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 4 Sep 2014 22:17:18 -0700 Subject: net: filter: split filter.h and expose eBPF to user space allow user space to generate eBPF programs uapi/linux/bpf.h: eBPF instruction set definition linux/filter.h: the rest This patch only moves macro definitions, but practically it freezes existing eBPF instruction set, though new instructions can still be added in the future. These eBPF definitions cannot go into uapi/linux/filter.h, since the names may conflict with existing applications. Full eBPF ISA description is in Documentation/networking/filter.txt Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 56 +--------------------------------------- include/uapi/linux/Kbuild | 1 + include/uapi/linux/bpf.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 55 deletions(-) create mode 100644 include/uapi/linux/bpf.h (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index bf323da77950..8f82ef3f1cdd 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -10,58 +10,12 @@ #include #include #include +#include struct sk_buff; struct sock; struct seccomp_data; -/* Internally used and optimized filter representation with extended - * instruction set based on top of classic BPF. - */ - -/* instruction classes */ -#define BPF_ALU64 0x07 /* alu mode in double word width */ - -/* ld/ldx fields */ -#define BPF_DW 0x18 /* double word */ -#define BPF_XADD 0xc0 /* exclusive add */ - -/* alu/jmp fields */ -#define BPF_MOV 0xb0 /* mov reg to reg */ -#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ - -/* change endianness of a register */ -#define BPF_END 0xd0 /* flags for endianness conversion: */ -#define BPF_TO_LE 0x00 /* convert to little-endian */ -#define BPF_TO_BE 0x08 /* convert to big-endian */ -#define BPF_FROM_LE BPF_TO_LE -#define BPF_FROM_BE BPF_TO_BE - -#define BPF_JNE 0x50 /* jump != */ -#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ -#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ -#define BPF_CALL 0x80 /* function call */ -#define BPF_EXIT 0x90 /* function return */ - -/* Register numbers */ -enum { - BPF_REG_0 = 0, - BPF_REG_1, - BPF_REG_2, - BPF_REG_3, - BPF_REG_4, - BPF_REG_5, - BPF_REG_6, - BPF_REG_7, - BPF_REG_8, - BPF_REG_9, - BPF_REG_10, - __MAX_BPF_REG, -}; - -/* BPF has 10 general purpose 64-bit registers and stack frame. */ -#define MAX_BPF_REG __MAX_BPF_REG - /* ArgX, context and stack frame pointer register positions. Note, * Arg1, Arg2, Arg3, etc are used as argument mappings of function * calls in BPF_CALL instruction. @@ -322,14 +276,6 @@ enum { #define SK_RUN_FILTER(filter, ctx) \ (*filter->prog->bpf_func)(ctx, filter->prog->insnsi) -struct bpf_insn { - __u8 code; /* opcode */ - __u8 dst_reg:4; /* dest register */ - __u8 src_reg:4; /* source register */ - __s16 off; /* signed offset */ - __s32 imm; /* signed immediate constant */ -}; - #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 24e9033f8b3f..fb3f7b675229 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -67,6 +67,7 @@ header-y += bfs_fs.h header-y += binfmts.h header-y += blkpg.h header-y += blktrace_api.h +header-y += bpf.h header-y += bpqether.h header-y += bsg.h header-y += btrfs.h diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h new file mode 100644 index 000000000000..479ed0b6be16 --- /dev/null +++ b/include/uapi/linux/bpf.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_H__ +#define _UAPI__LINUX_BPF_H__ + +#include + +/* Extended instruction set based on top of classic BPF */ + +/* instruction classes */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ + +/* ld/ldx fields */ +#define BPF_DW 0x18 /* double word */ +#define BPF_XADD 0xc0 /* exclusive add */ + +/* alu/jmp fields */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ + +/* change endianness of a register */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ + +/* Register numbers */ +enum { + BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, +}; + +/* BPF has 10 general purpose 64-bit registers and stack frame. */ +#define MAX_BPF_REG __MAX_BPF_REG + +struct bpf_insn { + __u8 code; /* opcode */ + __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ +}; + +#endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From dc8ecdd3a3fccf73fcb07711cde064ce5727f9d1 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Mon, 1 Sep 2014 23:11:06 +0200 Subject: bcma: move bus struct setup into early part of host specific code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change is important for SoC host. In future we will want to know chip ID (needed for early MIPS boot) before doing cores scanning. Signed-off-by: Rafał Miłecki Acked-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/host_pci.c | 3 +++ drivers/bcma/host_soc.c | 3 +++ drivers/bcma/main.c | 2 -- drivers/bcma/scan.c | 7 ------- include/linux/bcma/bcma.h | 1 - 5 files changed, 6 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c index f032ed6dd459..1e5ac0a79696 100644 --- a/drivers/bcma/host_pci.c +++ b/drivers/bcma/host_pci.c @@ -208,6 +208,9 @@ static int bcma_host_pci_probe(struct pci_dev *dev, bus->boardinfo.vendor = bus->host_pci->subsystem_vendor; bus->boardinfo.type = bus->host_pci->subsystem_device; + /* Initialize struct, detect chip */ + bcma_init_bus(bus); + /* Register */ err = bcma_bus_register(bus); if (err) diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c index 1edd7e064621..379e0d4ebe72 100644 --- a/drivers/bcma/host_soc.c +++ b/drivers/bcma/host_soc.c @@ -178,6 +178,9 @@ int __init bcma_host_soc_register(struct bcma_soc *soc) bus->hosttype = BCMA_HOSTTYPE_SOC; bus->ops = &bcma_host_soc_ops; + /* Initialize struct, detect chip */ + bcma_init_bus(bus); + /* Register */ err = bcma_bus_early_register(bus, &soc->core_cc, &soc->core_mips); if (err) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 0ff8d58831ef..9f6b0cb9a12c 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -334,8 +334,6 @@ int __init bcma_bus_early_register(struct bcma_bus *bus, struct bcma_device *core; struct bcma_device_id match; - bcma_init_bus(bus); - match.manuf = BCMA_MANUF_BCM; match.id = bcma_cc_core_id(bus); match.class = BCMA_CL_SIM; diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c index e9bd77249a4c..e2b990303042 100644 --- a/drivers/bcma/scan.c +++ b/drivers/bcma/scan.c @@ -438,9 +438,6 @@ void bcma_init_bus(struct bcma_bus *bus) s32 tmp; struct bcma_chipinfo *chipinfo = &(bus->chipinfo); - if (bus->init_done) - return; - INIT_LIST_HEAD(&bus->cores); bus->nr_cores = 0; @@ -452,8 +449,6 @@ void bcma_init_bus(struct bcma_bus *bus) chipinfo->pkg = (tmp & BCMA_CC_ID_PKG) >> BCMA_CC_ID_PKG_SHIFT; bcma_info(bus, "Found chip with id 0x%04X, rev 0x%02X and package 0x%02X\n", chipinfo->id, chipinfo->rev, chipinfo->pkg); - - bus->init_done = true; } int bcma_bus_scan(struct bcma_bus *bus) @@ -463,8 +458,6 @@ int bcma_bus_scan(struct bcma_bus *bus) int err, core_num = 0; - bcma_init_bus(bus); - erombase = bcma_scan_read32(bus, 0, BCMA_CC_EROM); if (bus->hosttype == BCMA_HOSTTYPE_SOC) { eromptr = ioremap_nocache(erombase, BCMA_CORE_SIZE); diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 0272e49135d0..c1ba87d1548e 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -332,7 +332,6 @@ struct bcma_bus { struct bcma_device *mapped_core; struct list_head cores; u8 nr_cores; - u8 init_done:1; u8 num; struct bcma_drv_cc drv_cc; -- cgit v1.2.3 From a395135ddebb0a06052b84c309eb6cb68b79c797 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Mon, 1 Sep 2014 23:11:07 +0200 Subject: bcma: use separated function to initialize bus on SoC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required to split SoC bus init into two phases. The later one (which includes scanning) should be called when kalloc is available. Cc: Ralf Baechle Signed-off-by: Rafał Miłecki Acked-by: Hauke Mehrtens Signed-off-by: John W. Linville --- arch/mips/bcm47xx/setup.c | 4 ++++ drivers/bcma/host_soc.c | 11 +++++++++-- include/linux/bcma/bcma_soc.h | 1 + 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 2b63e7e7d3d3..fff6ed4978c7 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -201,6 +201,10 @@ static void __init bcm47xx_register_bcma(void) pr_warn("bcm47xx: someone else already registered a bcma SPROM callback handler (err %d)\n", err); err = bcma_host_soc_register(&bcm47xx_bus.bcma); + if (err) + panic("Failed to register BCMA bus (err %d)", err); + + err = bcma_host_soc_init(&bcm47xx_bus.bcma); if (err) panic("Failed to initialize BCMA bus (err %d)", err); diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c index 379e0d4ebe72..718e054dd727 100644 --- a/drivers/bcma/host_soc.c +++ b/drivers/bcma/host_soc.c @@ -165,7 +165,6 @@ static const struct bcma_host_ops bcma_host_soc_ops = { int __init bcma_host_soc_register(struct bcma_soc *soc) { struct bcma_bus *bus = &soc->bus; - int err; /* iomap only first core. We have to read some register on this core * to scan the bus. @@ -181,7 +180,15 @@ int __init bcma_host_soc_register(struct bcma_soc *soc) /* Initialize struct, detect chip */ bcma_init_bus(bus); - /* Register */ + return 0; +} + +int __init bcma_host_soc_init(struct bcma_soc *soc) +{ + struct bcma_bus *bus = &soc->bus; + int err; + + /* Scan bus and initialize it */ err = bcma_bus_early_register(bus, &soc->core_cc, &soc->core_mips); if (err) iounmap(bus->mmio); diff --git a/include/linux/bcma/bcma_soc.h b/include/linux/bcma/bcma_soc.h index 4203c5593b9f..f24d245f8394 100644 --- a/include/linux/bcma/bcma_soc.h +++ b/include/linux/bcma/bcma_soc.h @@ -10,6 +10,7 @@ struct bcma_soc { }; int __init bcma_host_soc_register(struct bcma_soc *soc); +int __init bcma_host_soc_init(struct bcma_soc *soc); int bcma_bus_register(struct bcma_bus *bus); -- cgit v1.2.3 From 23a2f39c8f4035eade7f226eb7ada30c78d9eee3 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 8 Sep 2014 22:53:35 +0200 Subject: bcma: store more alternative addresses Each core could have more than one alternative address. There are cores with 8 alternative addresses for different functions. The PHY control in the Chip common B core is done through the 2. alternative address and not the first one. Signed-off-by: Hauke Mehrtens CC: linux-usb@vger.kernel.org Signed-off-by: John W. Linville --- drivers/bcma/scan.c | 9 +++++---- drivers/usb/host/bcma-hcd.c | 2 +- include/linux/bcma/bcma.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c index e2b990303042..06be7282cbc4 100644 --- a/drivers/bcma/scan.c +++ b/drivers/bcma/scan.c @@ -276,7 +276,7 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr, struct bcma_device *core) { u32 tmp; - u8 i, j; + u8 i, j, k; s32 cia, cib; u8 ports[2], wrappers[2]; @@ -367,6 +367,7 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr, core->addr = tmp; /* get & parse slave ports */ + k = 0; for (i = 0; i < ports[1]; i++) { for (j = 0; ; j++) { tmp = bcma_erom_get_addr_desc(bus, eromptr, @@ -376,9 +377,9 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr, /* pr_debug("erom: slave port %d " * "has %d descriptors\n", i, j); */ break; - } else { - if (i == 0 && j == 0) - core->addr1 = tmp; + } else if (k < ARRAY_SIZE(core->addr_s)) { + core->addr_s[k] = tmp; + k++; } } } diff --git a/drivers/usb/host/bcma-hcd.c b/drivers/usb/host/bcma-hcd.c index 205f4a336583..cd6d0afb6b8f 100644 --- a/drivers/usb/host/bcma-hcd.c +++ b/drivers/usb/host/bcma-hcd.c @@ -237,7 +237,7 @@ static int bcma_hcd_probe(struct bcma_device *dev) bcma_hcd_init_chip(dev); /* In AI chips EHCI is addrspace 0, OHCI is 1 */ - ohci_addr = dev->addr1; + ohci_addr = dev->addr_s[0]; if ((chipinfo->id == 0x5357 || chipinfo->id == 0x4749) && chipinfo->rev == 0) ohci_addr = 0x18009000; diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index c1ba87d1548e..7fc16c991291 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -267,7 +267,7 @@ struct bcma_device { u8 core_unit; u32 addr; - u32 addr1; + u32 addr_s[8]; u32 wrap; void __iomem *io_addr; -- cgit v1.2.3 From 1716bcf3f76fe71e98d4851a3eb73ea3d93d4773 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 8 Sep 2014 22:53:36 +0200 Subject: bcma: add support for chipcommon B core This core is used on BCM4708 to configure the PCIe and USB3 PHYs and it contains the addresses to the Device Management unit. This will be used by the PCIe driver first. Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/bcma/Makefile | 1 + drivers/bcma/bcma_private.h | 4 ++ drivers/bcma/driver_chipcommon_b.c | 61 +++++++++++++++++++++++++++++ drivers/bcma/main.c | 10 +++++ drivers/bcma/scan.c | 1 + include/linux/bcma/bcma.h | 1 + include/linux/bcma/bcma_driver_chipcommon.h | 8 ++++ 7 files changed, 86 insertions(+) create mode 100644 drivers/bcma/driver_chipcommon_b.c (limited to 'include/linux') diff --git a/drivers/bcma/Makefile b/drivers/bcma/Makefile index 91290f7f61b8..838b4b9d352f 100644 --- a/drivers/bcma/Makefile +++ b/drivers/bcma/Makefile @@ -1,5 +1,6 @@ bcma-y += main.o scan.o core.o sprom.o bcma-y += driver_chipcommon.o driver_chipcommon_pmu.o +bcma-y += driver_chipcommon_b.o bcma-$(CONFIG_BCMA_SFLASH) += driver_chipcommon_sflash.o bcma-$(CONFIG_BCMA_NFLASH) += driver_chipcommon_nflash.o bcma-y += driver_pci.o diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h index 09b632ad0fe2..b40be43c6f31 100644 --- a/drivers/bcma/bcma_private.h +++ b/drivers/bcma/bcma_private.h @@ -50,6 +50,10 @@ void bcma_chipco_serial_init(struct bcma_drv_cc *cc); extern struct platform_device bcma_pflash_dev; #endif /* CONFIG_BCMA_DRIVER_MIPS */ +/* driver_chipcommon_b.c */ +int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb); +void bcma_core_chipcommon_b_free(struct bcma_drv_cc_b *ccb); + /* driver_chipcommon_pmu.c */ u32 bcma_pmu_get_alp_clock(struct bcma_drv_cc *cc); u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc); diff --git a/drivers/bcma/driver_chipcommon_b.c b/drivers/bcma/driver_chipcommon_b.c new file mode 100644 index 000000000000..c20b5f4ff290 --- /dev/null +++ b/drivers/bcma/driver_chipcommon_b.c @@ -0,0 +1,61 @@ +/* + * Broadcom specific AMBA + * ChipCommon B Unit driver + * + * Copyright 2014, Hauke Mehrtens + * + * Licensed under the GNU/GPL. See COPYING for details. + */ + +#include "bcma_private.h" +#include +#include + +static bool bcma_wait_reg(struct bcma_bus *bus, void __iomem *addr, u32 mask, + u32 value, int timeout) +{ + unsigned long deadline = jiffies + timeout; + u32 val; + + do { + val = readl(addr); + if ((val & mask) == value) + return true; + cpu_relax(); + udelay(10); + } while (!time_after_eq(jiffies, deadline)); + + bcma_err(bus, "Timeout waiting for register %p\n", addr); + + return false; +} + +void bcma_chipco_b_mii_write(struct bcma_drv_cc_b *ccb, u32 offset, u32 value) +{ + struct bcma_bus *bus = ccb->core->bus; + + writel(offset, ccb->mii + 0x00); + bcma_wait_reg(bus, ccb->mii + 0x00, 0x0100, 0x0000, 100); + writel(value, ccb->mii + 0x04); + bcma_wait_reg(bus, ccb->mii + 0x00, 0x0100, 0x0000, 100); +} +EXPORT_SYMBOL_GPL(bcma_chipco_b_mii_write); + +int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb) +{ + if (ccb->setup_done) + return 0; + + ccb->setup_done = 1; + ccb->mii = ioremap_nocache(ccb->core->addr_s[1], BCMA_CORE_SIZE); + if (!ccb->mii) + return -ENOMEM; + + return 0; +} + +void bcma_core_chipcommon_b_free(struct bcma_drv_cc_b *ccb) +{ + if (ccb->mii) + iounmap(ccb->mii); +} diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 297a2d46985a..c421403cab43 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -173,6 +173,7 @@ static int bcma_register_devices(struct bcma_bus *bus) switch (core->id.id) { case BCMA_CORE_4706_CHIPCOMMON: case BCMA_CORE_CHIPCOMMON: + case BCMA_CORE_NS_CHIPCOMMON_B: case BCMA_CORE_PCI: case BCMA_CORE_PCIE: case BCMA_CORE_PCIE2: @@ -287,6 +288,13 @@ int bcma_bus_register(struct bcma_bus *bus) bcma_core_chipcommon_init(&bus->drv_cc); } + /* Init CC core */ + core = bcma_find_core(bus, BCMA_CORE_NS_CHIPCOMMON_B); + if (core) { + bus->drv_cc_b.core = core; + bcma_core_chipcommon_b_init(&bus->drv_cc_b); + } + /* Init MIPS core */ core = bcma_find_core(bus, BCMA_CORE_MIPS_74K); if (core) { @@ -341,6 +349,8 @@ void bcma_bus_unregister(struct bcma_bus *bus) else if (err) bcma_err(bus, "Can not unregister GPIO driver: %i\n", err); + bcma_core_chipcommon_b_free(&bus->drv_cc_b); + cores[0] = bcma_find_core(bus, BCMA_CORE_MIPS_74K); cores[1] = bcma_find_core(bus, BCMA_CORE_PCIE); cores[2] = bcma_find_core(bus, BCMA_CORE_4706_MAC_GBIT_COMMON); diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c index 06be7282cbc4..b3a403c136fb 100644 --- a/drivers/bcma/scan.c +++ b/drivers/bcma/scan.c @@ -314,6 +314,7 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr, /* Some specific cores don't need wrappers */ switch (core->id.id) { case BCMA_CORE_4706_MAC_GBIT_COMMON: + case BCMA_CORE_NS_CHIPCOMMON_B: /* Not used yet: case BCMA_CORE_OOB_ROUTER: */ break; default: diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 7fc16c991291..634597917670 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -335,6 +335,7 @@ struct bcma_bus { u8 num; struct bcma_drv_cc drv_cc; + struct bcma_drv_cc_b drv_cc_b; struct bcma_drv_pci drv_pci[2]; struct bcma_drv_pcie2 drv_pcie2; struct bcma_drv_mips drv_mips; diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 63d105cd14a3..db6fa217f98b 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -644,6 +644,12 @@ struct bcma_drv_cc { #endif }; +struct bcma_drv_cc_b { + struct bcma_device *core; + u8 setup_done:1; + void __iomem *mii; +}; + /* Register access */ #define bcma_cc_read32(cc, offset) \ bcma_read32((cc)->core, offset) @@ -699,4 +705,6 @@ extern void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid); extern u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc); +void bcma_chipco_b_mii_write(struct bcma_drv_cc_b *ccb, u32 offset, u32 value); + #endif /* LINUX_BCMA_DRIVER_CC_H_ */ -- cgit v1.2.3 From d0449b90f80f263e17e8b3ce31442e45121dc46c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 22 Aug 2014 10:18:42 -0400 Subject: locks: Remove unused conf argument from lm_grant This argument is always NULL so don't pass it around. [jlayton: remove dependencies on previous patches in series] Signed-off-by: Joe Perches Signed-off-by: Jeff Layton --- fs/dlm/plock.c | 8 ++++---- fs/lockd/svclock.c | 12 +++--------- include/linux/fs.h | 2 +- 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index f704458ea5f5..e0ab3a93eeff 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -30,7 +30,7 @@ struct plock_op { struct plock_xop { struct plock_op xop; - void *callback; + int (*callback)(struct file_lock *fl, int result); void *fl; void *file; struct file_lock flc; @@ -190,7 +190,7 @@ static int dlm_plock_callback(struct plock_op *op) struct file *file; struct file_lock *fl; struct file_lock *flc; - int (*notify)(void *, void *, int) = NULL; + int (*notify)(struct file_lock *fl, int result) = NULL; struct plock_xop *xop = (struct plock_xop *)op; int rv = 0; @@ -209,7 +209,7 @@ static int dlm_plock_callback(struct plock_op *op) notify = xop->callback; if (op->info.rv) { - notify(fl, NULL, op->info.rv); + notify(fl, op->info.rv); goto out; } @@ -228,7 +228,7 @@ static int dlm_plock_callback(struct plock_op *op) (unsigned long long)op->info.number, file, fl); } - rv = notify(fl, NULL, 0); + rv = notify(fl, 0); if (rv) { /* XXX: We need to cancel the fs lock here: */ log_print("dlm_plock_callback: lock granted after lock request " diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index ab798a88ec1d..2a6170133c1d 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -667,22 +667,16 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l * deferred rpc for GETLK and SETLK. */ static void -nlmsvc_update_deferred_block(struct nlm_block *block, struct file_lock *conf, - int result) +nlmsvc_update_deferred_block(struct nlm_block *block, int result) { block->b_flags |= B_GOT_CALLBACK; if (result == 0) block->b_granted = 1; else block->b_flags |= B_TIMED_OUT; - if (conf) { - if (block->b_fl) - __locks_copy_lock(block->b_fl, conf); - } } -static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf, - int result) +static int nlmsvc_grant_deferred(struct file_lock *fl, int result) { struct nlm_block *block; int rc = -ENOENT; @@ -697,7 +691,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf, rc = -ENOLCK; break; } - nlmsvc_update_deferred_block(block, conf, result); + nlmsvc_update_deferred_block(block, result); } else if (result == 0) block->b_granted = 1; diff --git a/include/linux/fs.h b/include/linux/fs.h index 94187721ad41..908af4f81680 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -869,7 +869,7 @@ struct lock_manager_operations { int (*lm_compare_owner)(struct file_lock *, struct file_lock *); unsigned long (*lm_owner_key)(struct file_lock *); void (*lm_notify)(struct file_lock *); /* unblock callback */ - int (*lm_grant)(struct file_lock *, struct file_lock *, int); + int (*lm_grant)(struct file_lock *, int); void (*lm_break)(struct file_lock *); int (*lm_change)(struct file_lock **, int); }; -- cgit v1.2.3 From 3fe0fff18fe87c6a2179837de68d1174903c6367 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 22 Aug 2014 10:18:42 -0400 Subject: locks: Rename __locks_copy_lock() to locks_copy_conflock() Jeff advice, " Right now __locks_copy_lock is only used to copy conflocks. It would be good to rename that to something more distinct (i.e.locks_copy_conflock), to make it clear that we're generating a conflock there." v5: change order from 3/6 to 2/6 v4: new patch only renaming function name Signed-off-by: Kinglong Mee Signed-off-by: Jeff Layton --- fs/locks.c | 10 +++++----- include/linux/fs.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/locks.c b/fs/locks.c index bb08857f90b5..ec9becd02d3d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -281,7 +281,7 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl) /* * Initialize a new lock from an existing file_lock structure. */ -void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) +void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) { new->fl_owner = fl->fl_owner; new->fl_pid = fl->fl_pid; @@ -293,14 +293,14 @@ void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) new->fl_ops = NULL; new->fl_lmops = NULL; } -EXPORT_SYMBOL(__locks_copy_lock); +EXPORT_SYMBOL(locks_copy_conflock); void locks_copy_lock(struct file_lock *new, struct file_lock *fl) { /* "new" must be a freshly-initialized lock */ WARN_ON_ONCE(new->fl_ops); - __locks_copy_lock(new, fl); + locks_copy_conflock(new, fl); new->fl_file = fl->fl_file; new->fl_ops = fl->fl_ops; new->fl_lmops = fl->fl_lmops; @@ -735,7 +735,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) break; } if (cfl) { - __locks_copy_lock(fl, cfl); + locks_copy_conflock(fl, cfl); if (cfl->fl_nspid) fl->fl_pid = pid_vnr(cfl->fl_nspid); } else @@ -941,7 +941,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str if (!posix_locks_conflict(request, fl)) continue; if (conflock) - __locks_copy_lock(conflock, fl); + locks_copy_conflock(conflock, fl); error = -EAGAIN; if (!(request->fl_flags & FL_SLEEP)) goto out; diff --git a/include/linux/fs.h b/include/linux/fs.h index 908af4f81680..5ab86f44b697 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -966,7 +966,7 @@ void locks_free_lock(struct file_lock *fl); extern void locks_init_lock(struct file_lock *); extern struct file_lock * locks_alloc_lock(void); extern void locks_copy_lock(struct file_lock *, struct file_lock *); -extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); +extern void locks_copy_conflock(struct file_lock *, struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); @@ -1026,7 +1026,7 @@ static inline void locks_init_lock(struct file_lock *fl) return; } -static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) +static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) { return; } -- cgit v1.2.3 From 5c97d7b1479982a48cf2129062b880c2555049ac Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 22 Aug 2014 10:18:43 -0400 Subject: locks: New ops in lock_manager_operations for get/put owner NFSD or other lockmanager may increase the owner's reference, so adds two new options for copying and releasing owner. v5: change order from 2/6 to 3/6 v4: rename lm_copy_owner/lm_release_owner to lm_get_owner/lm_put_owner Reviewed-by: Jeff Layton Signed-off-by: Kinglong Mee Signed-off-by: Jeff Layton --- fs/locks.c | 12 ++++++++++-- include/linux/fs.h | 2 ++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/locks.c b/fs/locks.c index ec9becd02d3d..5e83f3a99377 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -230,8 +230,12 @@ void locks_release_private(struct file_lock *fl) fl->fl_ops->fl_release_private(fl); fl->fl_ops = NULL; } - fl->fl_lmops = NULL; + if (fl->fl_lmops) { + if (fl->fl_lmops->lm_put_owner) + fl->fl_lmops->lm_put_owner(fl); + fl->fl_lmops = NULL; + } } EXPORT_SYMBOL_GPL(locks_release_private); @@ -274,8 +278,12 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl) fl->fl_ops->fl_copy_lock(new, fl); new->fl_ops = fl->fl_ops; } - if (fl->fl_lmops) + + if (fl->fl_lmops) { + if (fl->fl_lmops->lm_get_owner) + fl->fl_lmops->lm_get_owner(new, fl); new->fl_lmops = fl->fl_lmops; + } } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 5ab86f44b697..3b07ce2698de 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -868,6 +868,8 @@ struct file_lock_operations { struct lock_manager_operations { int (*lm_compare_owner)(struct file_lock *, struct file_lock *); unsigned long (*lm_owner_key)(struct file_lock *); + void (*lm_get_owner)(struct file_lock *, struct file_lock *); + void (*lm_put_owner)(struct file_lock *); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, int); void (*lm_break)(struct file_lock *); -- cgit v1.2.3 From 09802fd2a8caea2a2147fca8d7975697c5de573d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 22 Aug 2014 10:18:44 -0400 Subject: lockd: rip out deferred lock handling from testlock codepath As Kinglong points out, the nlm_block->b_fl field is no longer used at all. Also, vfs_test_lock in the generic locking code will only return FILE_LOCK_DEFERRED if FL_SLEEP is set, and it isn't here. The only other place that returns that value is the DLM lock code, but it only does that in dlm_posix_lock, never in dlm_posix_get. Remove all of the deferred locking code from the testlock codepath since it doesn't appear to ever be used anyway. I do have a small concern that this might cause a behavior change in the case where you have a block already sitting on the list when the testlock request comes in, but that looks like it doesn't really work properly anyway. I think it's best to just pass that down to vfs_test_lock and let the filesystem report that instead of trying to infer what's going on with the lock by looking at an existing block. Cc: cluster-devel@redhat.com Signed-off-by: Jeff Layton Reviewed-by: Kinglong Mee --- fs/lockd/svclock.c | 55 +++++---------------------------------------- include/linux/lockd/lockd.h | 1 - 2 files changed, 6 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index acfa94d5b489..13db95f54176 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -245,7 +245,6 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host, block->b_daemon = rqstp->rq_server; block->b_host = host; block->b_file = file; - block->b_fl = NULL; file->f_count++; /* Add to file's list of blocks */ @@ -295,7 +294,6 @@ static void nlmsvc_free_block(struct kref *kref) nlmsvc_freegrantargs(block->b_call); nlmsvc_release_call(block->b_call); nlm_release_file(block->b_file); - kfree(block->b_fl); kfree(block); } @@ -508,7 +506,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_host *host, struct nlm_lock *lock, struct nlm_lock *conflock, struct nlm_cookie *cookie) { - struct nlm_block *block = NULL; int error; __be32 ret; @@ -519,63 +516,26 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); - /* Get existing block (in case client is busy-waiting) */ - block = nlmsvc_lookup_block(file, lock); - - if (block == NULL) { - struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL); - - if (conf == NULL) - return nlm_granted; - block = nlmsvc_create_block(rqstp, host, file, lock, cookie); - if (block == NULL) { - kfree(conf); - return nlm_granted; - } - block->b_fl = conf; - } - if (block->b_flags & B_QUEUED) { - dprintk("lockd: nlmsvc_testlock deferred block %p flags %d fl %p\n", - block, block->b_flags, block->b_fl); - if (block->b_flags & B_TIMED_OUT) { - nlmsvc_unlink_block(block); - ret = nlm_lck_denied; - goto out; - } - if (block->b_flags & B_GOT_CALLBACK) { - nlmsvc_unlink_block(block); - if (block->b_fl != NULL - && block->b_fl->fl_type != F_UNLCK) { - lock->fl = *block->b_fl; - goto conf_lock; - } else { - ret = nlm_granted; - goto out; - } - } - ret = nlm_drop_reply; - goto out; - } - if (locks_in_grace(SVC_NET(rqstp))) { ret = nlm_lck_denied_grace_period; goto out; } + error = vfs_test_lock(file->f_file, &lock->fl); - if (error == FILE_LOCK_DEFERRED) { - ret = nlmsvc_defer_lock_rqst(rqstp, block); - goto out; - } if (error) { + /* We can't currently deal with deferred test requests */ + if (error == FILE_LOCK_DEFERRED) + WARN_ON_ONCE(1); + ret = nlm_lck_denied_nolocks; goto out; } + if (lock->fl.fl_type == F_UNLCK) { ret = nlm_granted; goto out; } -conf_lock: dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n", lock->fl.fl_type, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); @@ -589,8 +549,6 @@ conf_lock: locks_release_private(&lock->fl); ret = nlm_lck_denied; out: - if (block) - nlmsvc_release_block(block); return ret; } @@ -661,7 +619,6 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l * This is a callback from the filesystem for VFS file lock requests. * It will be used if lm_grant is defined and the filesystem can not * respond to the request immediately. - * For GETLK request it will copy the reply to the nlm_block. * For SETLK or SETLKW request it will get the local posix lock. * In all cases it will move the block to the head of nlm_blocked q where * nlmsvc_retry_blocked() can send back a reply for SETLKW or revisit the diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 219d79627c05..ff82a32871b5 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -178,7 +178,6 @@ struct nlm_block { unsigned char b_granted; /* VFS granted lock */ struct nlm_file * b_file; /* file in question */ struct cache_req * b_cache_req; /* deferred request handling */ - struct file_lock * b_fl; /* set for GETLK */ struct cache_deferred_req * b_deferred_req; unsigned int b_flags; /* block flags */ #define B_QUEUED 1 /* lock queued */ -- cgit v1.2.3 From 699688a416524c3cea9eafaca69fc6c06c13c02e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 22 Aug 2014 10:18:44 -0400 Subject: locks: remove lock_may_read and lock_may_write There are no callers of these functions. Signed-off-by: Jeff Layton --- fs/locks.c | 80 ------------------------------------------------------ include/linux/fs.h | 14 ---------- 2 files changed, 94 deletions(-) (limited to 'include/linux') diff --git a/fs/locks.c b/fs/locks.c index c3991dc80137..5200ffd2ba9b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2597,86 +2597,6 @@ static int __init proc_locks_init(void) module_init(proc_locks_init); #endif -/** - * lock_may_read - checks that the region is free of locks - * @inode: the inode that is being read - * @start: the first byte to read - * @len: the number of bytes to read - * - * Emulates Windows locking requirements. Whole-file - * mandatory locks (share modes) can prohibit a read and - * byte-range POSIX locks can prohibit a read if they overlap. - * - * N.B. this function is only ever called - * from knfsd and ownership of locks is never checked. - */ -int lock_may_read(struct inode *inode, loff_t start, unsigned long len) -{ - struct file_lock *fl; - int result = 1; - - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (IS_POSIX(fl)) { - if (fl->fl_type == F_RDLCK) - continue; - if ((fl->fl_end < start) || (fl->fl_start > (start + len))) - continue; - } else if (IS_FLOCK(fl)) { - if (!(fl->fl_type & LOCK_MAND)) - continue; - if (fl->fl_type & LOCK_READ) - continue; - } else - continue; - result = 0; - break; - } - spin_unlock(&inode->i_lock); - return result; -} - -EXPORT_SYMBOL(lock_may_read); - -/** - * lock_may_write - checks that the region is free of locks - * @inode: the inode that is being written - * @start: the first byte to write - * @len: the number of bytes to write - * - * Emulates Windows locking requirements. Whole-file - * mandatory locks (share modes) can prohibit a write and - * byte-range POSIX locks can prohibit a write if they overlap. - * - * N.B. this function is only ever called - * from knfsd and ownership of locks is never checked. - */ -int lock_may_write(struct inode *inode, loff_t start, unsigned long len) -{ - struct file_lock *fl; - int result = 1; - - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (IS_POSIX(fl)) { - if ((fl->fl_end < start) || (fl->fl_start > (start + len))) - continue; - } else if (IS_FLOCK(fl)) { - if (!(fl->fl_type & LOCK_MAND)) - continue; - if (fl->fl_type & LOCK_WRITE) - continue; - } else - continue; - result = 0; - break; - } - spin_unlock(&inode->i_lock); - return result; -} - -EXPORT_SYMBOL(lock_may_write); - static int __init filelock_init(void) { int i; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3b07ce2698de..458f733c96bd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -985,8 +985,6 @@ extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **); extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); -extern int lock_may_read(struct inode *, loff_t start, unsigned long count); -extern int lock_may_write(struct inode *, loff_t start, unsigned long count); #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1117,18 +1115,6 @@ static inline int lease_modify(struct file_lock **before, int arg) { return -EINVAL; } - -static inline int lock_may_read(struct inode *inode, loff_t start, - unsigned long len) -{ - return 1; -} - -static inline int lock_may_write(struct inode *inode, loff_t start, - unsigned long len) -{ - return 1; -} #endif /* !CONFIG_FILE_LOCKING */ -- cgit v1.2.3 From 1c994a0909a556508c2cc26ab5d9e13c5ce33aa0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 27 Aug 2014 06:49:41 -0400 Subject: locks: consolidate "nolease" routines GFS2 and NFS have setlease routines that always just return -EINVAL. Turn that into a generic routine that can live in fs/libfs.c. Cc: Cc: Steven Whitehouse Cc: Signed-off-by: Jeff Layton Acked-by: Trond Myklebust Reviewed-by: Christoph Hellwig --- fs/gfs2/file.c | 22 +--------------------- fs/libfs.c | 16 ++++++++++++++++ fs/nfs/file.c | 13 +------------ fs/nfs/internal.h | 1 - fs/nfs/nfs4file.c | 2 +- include/linux/fs.h | 1 + 6 files changed, 20 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 26b3f952e6b1..2c02478a86b0 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -912,26 +912,6 @@ out_uninit: #ifdef CONFIG_GFS2_FS_LOCKING_DLM -/** - * gfs2_setlease - acquire/release a file lease - * @file: the file pointer - * @arg: lease type - * @fl: file lock - * - * We don't currently have a way to enforce a lease across the whole - * cluster; until we do, disable leases (by just returning -EINVAL), - * unless the administrator has requested purely local locking. - * - * Locking: called under i_lock - * - * Returns: errno - */ - -static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) -{ - return -EINVAL; -} - /** * gfs2_lock - acquire/release a posix lock on a file * @file: the file pointer @@ -1069,7 +1049,7 @@ const struct file_operations gfs2_file_fops = { .flock = gfs2_flock, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, - .setlease = gfs2_setlease, + .setlease = simple_nosetlease, .fallocate = gfs2_fallocate, }; diff --git a/fs/libfs.c b/fs/libfs.c index 88e3e00e2eca..29012a303ef8 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1075,3 +1075,19 @@ struct inode *alloc_anon_inode(struct super_block *s) return inode; } EXPORT_SYMBOL(alloc_anon_inode); + +/** + * simple_nosetlease - generic helper for prohibiting leases + * @filp: file pointer + * @arg: type of lease to obtain + * @flp: new lease supplied for insertion + * + * Generic helper for filesystems that do not wish to allow leases to be set. + * All arguments are ignored and it just returns -EINVAL. + */ +int +simple_nosetlease(struct file *filp, long arg, struct file_lock **flp) +{ + return -EINVAL; +} +EXPORT_SYMBOL(simple_nosetlease); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 524dd80d1898..8c4048ecdad1 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -891,17 +891,6 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) } EXPORT_SYMBOL_GPL(nfs_flock); -/* - * There is no protocol support for leases, so we have no way to implement - * them correctly in the face of opens by other clients. - */ -int nfs_setlease(struct file *file, long arg, struct file_lock **fl) -{ - dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg); - return -EINVAL; -} -EXPORT_SYMBOL_GPL(nfs_setlease); - const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, .read = new_sync_read, @@ -918,6 +907,6 @@ const struct file_operations nfs_file_operations = { .splice_read = nfs_file_splice_read, .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, - .setlease = nfs_setlease, + .setlease = simple_nosetlease, }; EXPORT_SYMBOL_GPL(nfs_file_operations); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9056622d2230..94d922ebb5ac 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -346,7 +346,6 @@ int nfs_file_release(struct inode *, struct file *); int nfs_lock(struct file *, int, struct file_lock *); int nfs_flock(struct file *, int, struct file_lock *); int nfs_check_flags(int); -int nfs_setlease(struct file *, long, struct file_lock **); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index a816f0627a6c..3e987ad9ae25 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -131,5 +131,5 @@ const struct file_operations nfs4_file_operations = { .splice_read = nfs_file_splice_read, .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, - .setlease = nfs_setlease, + .setlease = simple_nosetlease, }; diff --git a/include/linux/fs.h b/include/linux/fs.h index 458f733c96bd..435e3d9ec5cf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2599,6 +2599,7 @@ extern int simple_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata); extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +extern int simple_nosetlease(struct file *, long, struct file_lock **); extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); -- cgit v1.2.3 From e0b93eddfe17dcb7d644eb5d6ad02a86fc41a977 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 22 Aug 2014 11:27:32 -0400 Subject: security: make security_file_set_fowner, f_setown and __f_setown void return security_file_set_fowner always returns 0, so make it f_setown and __f_setown void return functions and fix up the error handling in the callers. Cc: linux-security-module@vger.kernel.org Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig --- drivers/net/tun.c | 4 +--- drivers/tty/tty_io.c | 3 ++- fs/fcntl.c | 21 +++++++-------------- fs/locks.c | 2 +- fs/notify/dnotify/dnotify.c | 8 +------- include/linux/fs.h | 4 ++-- include/linux/security.h | 8 ++++---- net/socket.c | 3 ++- security/capability.c | 4 ++-- security/security.c | 4 ++-- security/selinux/hooks.c | 4 +--- security/smack/smack_lsm.c | 3 +-- 12 files changed, 26 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index acaaf6784179..186ce541c657 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2152,9 +2152,7 @@ static int tun_chr_fasync(int fd, struct file *file, int on) goto out; if (on) { - ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0); - if (ret) - goto out; + __f_setown(file, task_pid(current), PIDTYPE_PID, 0); tfile->flags |= TUN_FASYNC; } else tfile->flags &= ~TUN_FASYNC; diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 8fbad3410c75..aea3b66f7bf2 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2163,8 +2163,9 @@ static int __tty_fasync(int fd, struct file *filp, int on) } get_pid(pid); spin_unlock_irqrestore(&tty->ctrl_lock, flags); - retval = __f_setown(filp, pid, type, 0); + __f_setown(filp, pid, type, 0); put_pid(pid); + retval = 0; } out: return retval; diff --git a/fs/fcntl.c b/fs/fcntl.c index 22d1c3df61ac..99d440a4a6ba 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -98,26 +98,19 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, write_unlock_irq(&filp->f_owner.lock); } -int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, +void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, int force) { - int err; - - err = security_file_set_fowner(filp); - if (err) - return err; - + security_file_set_fowner(filp); f_modown(filp, pid, type, force); - return 0; } EXPORT_SYMBOL(__f_setown); -int f_setown(struct file *filp, unsigned long arg, int force) +void f_setown(struct file *filp, unsigned long arg, int force) { enum pid_type type; struct pid *pid; int who = arg; - int result; type = PIDTYPE_PID; if (who < 0) { type = PIDTYPE_PGID; @@ -125,9 +118,8 @@ int f_setown(struct file *filp, unsigned long arg, int force) } rcu_read_lock(); pid = find_vpid(who); - result = __f_setown(filp, pid, type, force); + __f_setown(filp, pid, type, force); rcu_read_unlock(); - return result; } EXPORT_SYMBOL(f_setown); @@ -181,7 +173,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg) if (owner.pid && !pid) ret = -ESRCH; else - ret = __f_setown(filp, pid, type, 1); + __f_setown(filp, pid, type, 1); rcu_read_unlock(); return ret; @@ -302,7 +294,8 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, force_successful_syscall_return(); break; case F_SETOWN: - err = f_setown(filp, arg, 1); + f_setown(filp, arg, 1); + err = 0; break; case F_GETOWN_EX: err = f_getown_ex(filp, arg); diff --git a/fs/locks.c b/fs/locks.c index 5200ffd2ba9b..f5f648e003dd 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1776,7 +1776,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new)) new = NULL; - error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); + __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); out_unlock: spin_unlock(&inode->i_lock); if (fl) diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index abc8cbcfe90e..caaaf9dfe353 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -346,13 +346,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) goto out; } - error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); - if (error) { - /* if we added, we must shoot */ - if (dn_mark == new_dn_mark) - destroy = 1; - goto out; - } + __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); error = attach_dn(dn, dn_mark, id, fd, filp, mask); /* !error means that we attached the dn to the dn_mark, so don't free it */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 435e3d9ec5cf..96528f73dda4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1139,8 +1139,8 @@ extern void fasync_free(struct fasync_struct *); /* can be called from interrupts */ extern void kill_fasync(struct fasync_struct **, int, int); -extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); -extern int f_setown(struct file *filp, unsigned long arg, int force); +extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); +extern void f_setown(struct file *filp, unsigned long arg, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); diff --git a/include/linux/security.h b/include/linux/security.h index 623f90e5f38d..b10e7af95d3b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1559,7 +1559,7 @@ struct security_operations { int (*file_lock) (struct file *file, unsigned int cmd); int (*file_fcntl) (struct file *file, unsigned int cmd, unsigned long arg); - int (*file_set_fowner) (struct file *file); + void (*file_set_fowner) (struct file *file); int (*file_send_sigiotask) (struct task_struct *tsk, struct fown_struct *fown, int sig); int (*file_receive) (struct file *file); @@ -1834,7 +1834,7 @@ int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); int security_file_lock(struct file *file, unsigned int cmd); int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg); -int security_file_set_fowner(struct file *file); +void security_file_set_fowner(struct file *file); int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); @@ -2312,9 +2312,9 @@ static inline int security_file_fcntl(struct file *file, unsigned int cmd, return 0; } -static inline int security_file_set_fowner(struct file *file) +static inline void security_file_set_fowner(struct file *file) { - return 0; + return; } static inline int security_file_send_sigiotask(struct task_struct *tsk, diff --git a/net/socket.c b/net/socket.c index 95ee7d8682e7..769c9671847e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1069,7 +1069,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = -EFAULT; if (get_user(pid, (int __user *)argp)) break; - err = f_setown(sock->file, pid, 1); + f_setown(sock->file, pid, 1); + err = 0; break; case FIOGETOWN: case SIOCGPGRP: diff --git a/security/capability.c b/security/capability.c index a74fde6a7468..d68c57a62bcf 100644 --- a/security/capability.c +++ b/security/capability.c @@ -343,9 +343,9 @@ static int cap_file_fcntl(struct file *file, unsigned int cmd, return 0; } -static int cap_file_set_fowner(struct file *file) +static void cap_file_set_fowner(struct file *file) { - return 0; + return; } static int cap_file_send_sigiotask(struct task_struct *tsk, diff --git a/security/security.c b/security/security.c index e41b1a8d7644..18b35c63fc0c 100644 --- a/security/security.c +++ b/security/security.c @@ -775,9 +775,9 @@ int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_fcntl(file, cmd, arg); } -int security_file_set_fowner(struct file *file) +void security_file_set_fowner(struct file *file) { - return security_ops->file_set_fowner(file); + security_ops->file_set_fowner(file); } int security_file_send_sigiotask(struct task_struct *tsk, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b0e940497e23..ada0d0bf3463 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3346,14 +3346,12 @@ static int selinux_file_fcntl(struct file *file, unsigned int cmd, return err; } -static int selinux_file_set_fowner(struct file *file) +static void selinux_file_set_fowner(struct file *file) { struct file_security_struct *fsec; fsec = file->f_security; fsec->fown_sid = current_sid(); - - return 0; } static int selinux_file_send_sigiotask(struct task_struct *tsk, diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index e6ab307ce86e..69e5635d89e5 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1390,12 +1390,11 @@ static int smack_mmap_file(struct file *file, * Returns 0 * Further research may be required on this one. */ -static int smack_file_set_fowner(struct file *file) +static void smack_file_set_fowner(struct file *file) { struct smack_known *skp = smk_of_current(); file->f_security = skp->smk_known; - return 0; } /** -- cgit v1.2.3 From 2ae4c673e3cbd69bc2decf6d7f5961f3c7b9b38b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 2 Sep 2014 15:43:52 -0700 Subject: f2fs: retain inconsistency information to initiate fsck.f2fs This patch adds sbi->need_fsck to conduct fsck.f2fs later. This flag can only be removed by fsck.f2fs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 1 + include/linux/f2fs_fs.h | 1 + 4 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index cb5cb4ca1814..5af97d99106e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -882,6 +882,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + if (sbi->need_fsck) + set_ckpt_flags(ckpt, CP_FSCK_FLAG); + /* update SIT/NAT bitmap */ get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 222ff5b4d1e1..210c62df08c3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -434,6 +434,7 @@ struct f2fs_sb_info { struct buffer_head *raw_super_buf; /* buffer head of raw sb */ struct f2fs_super_block *raw_super; /* raw super block pointer */ int s_dirty; /* dirty flag for checkpoint */ + bool need_fsck; /* need fsck.f2fs to fix */ /* for node-related operations */ struct f2fs_nm_info *nm_info; /* node manager */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41bdf511003d..a6923041f41a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -849,6 +849,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->nr_pages[i], 0); sbi->dir_level = DEF_DIR_LEVEL; + sbi->need_fsck = false; } /* diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 08ed2b0a96e6..9ca1ff3d4662 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -85,6 +85,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_FSCK_FLAG 0x00000010 #define CP_ERROR_FLAG 0x00000008 #define CP_COMPACT_SUM_FLAG 0x00000004 #define CP_ORPHAN_PRESENT_FLAG 0x00000002 -- cgit v1.2.3 From 87354059881ce9315181604dc17076c535f4d744 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 22 Jul 2014 20:41:42 -0400 Subject: ftrace: Add helper function ftrace_ops_get_func() Add the helper function to what the mcount trampoline is to call for a ftrace_ops function. This helper will be used by arch code in the future to set up dynamic trampolines. But as this does the same tests that are performed in choosing what function to call for the default mcount trampoline, might as well use it to clean up the existing code. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 ++ kernel/trace/ftrace.c | 47 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f0b0edbf55a9..ef37286547fc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -56,6 +56,8 @@ struct ftrace_ops; typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs); +ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); + /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are * set in the flags member. diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 17b606362ab4..dabf734f909c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -259,20 +259,12 @@ static void update_ftrace_function(void) * then have the mcount trampoline call the function directly. */ if (ftrace_ops_list == &ftrace_list_end || - (ftrace_ops_list->next == &ftrace_list_end && - !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) && - !FTRACE_FORCE_LIST_FUNC)) { + (ftrace_ops_list->next == &ftrace_list_end)) { + /* Set the ftrace_ops that the arch callback uses */ set_function_trace_op = ftrace_ops_list; - /* - * If the func handles its own recursion, call it directly. - * Otherwise call the recursion protected function that - * will call the ftrace ops function. - */ - if (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) - func = ftrace_ops_list->func; - else - func = ftrace_ops_recurs_func; + + func = ftrace_ops_get_func(ftrace_ops_list); } else { /* Just use the default ftrace_ops */ set_function_trace_op = &ftrace_list_end; @@ -4856,6 +4848,37 @@ static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, trace_clear_recursion(bit); } +/** + * ftrace_ops_get_func - get the function a trampoline should call + * @ops: the ops to get the function for + * + * Normally the mcount trampoline will call the ops->func, but there + * are times that it should not. For example, if the ops does not + * have its own recursion protection, then it should call the + * ftrace_ops_recurs_func() instead. + * + * Returns the function that the trampoline should call for @ops. + */ +ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) +{ + /* + * If this is a dynamic ops or we force list func, + * then it needs to call the list anyway. + */ + if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC) + return ftrace_ops_list_func; + + /* + * If the func handles its own recursion, call it directly. + * Otherwise call the recursion protected function that + * will call the ftrace ops function. + */ + if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE)) + return ftrace_ops_recurs_func; + + return ops->func; +} + static void clear_ftrace_swapper(void) { struct task_struct *p; -- cgit v1.2.3 From 738cbe72adc5c8f2016c4c68aa5162631d4f27e1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 8 Sep 2014 08:04:47 +0200 Subject: net: bpf: consolidate JIT binary allocator Introduced in commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit against spraying attacks") and later on replicated in aa2d2c73c21f ("s390/bpf,jit: address randomize and write protect jit code") for s390 architecture, write protection for BPF JIT images got added and a random start address of the JIT code, so that it's not on a page boundary anymore. Since both use a very similar allocator for the BPF binary header, we can consolidate this code into the BPF core as it's mostly JIT independant anyway. This will also allow for future archs that support DEBUG_SET_MODULE_RONX to just reuse instead of reimplementing it. JIT tested on x86_64 and s390x with BPF test suite. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Eric Dumazet Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit_comp.c | 45 ++++++++------------------------------- arch/x86/net/bpf_jit_comp.c | 50 ++++++++++---------------------------------- include/linux/filter.h | 13 ++++++++++++ kernel/bpf/core.c | 39 ++++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index f2833c5b218a..b734f975c22e 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -5,11 +5,9 @@ * * Author(s): Martin Schwidefsky */ -#include #include #include #include -#include #include #include #include @@ -148,6 +146,12 @@ struct bpf_jit { ret; \ }) +static void bpf_jit_fill_hole(void *area, unsigned int size) +{ + /* Fill whole space with illegal instructions */ + memset(area, 0, size); +} + static void bpf_jit_prologue(struct bpf_jit *jit) { /* Save registers and create stack frame if necessary */ @@ -780,38 +784,6 @@ out: return -1; } -/* - * Note: for security reasons, bpf code will follow a randomly - * sized amount of illegal instructions. - */ -struct bpf_binary_header { - unsigned int pages; - u8 image[]; -}; - -static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize, - u8 **image_ptr) -{ - struct bpf_binary_header *header; - unsigned int sz, hole; - - /* Most BPF filters are really small, but if some of them fill a page, - * allow at least 128 extra bytes for illegal instructions. - */ - sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE); - header = module_alloc(sz); - if (!header) - return NULL; - memset(header, 0, sz); - header->pages = sz / PAGE_SIZE; - hole = min(sz - (bpfsize + sizeof(*header)), PAGE_SIZE - sizeof(*header)); - /* Insert random number of illegal instructions before BPF code - * and make sure the first instruction starts at an even address. - */ - *image_ptr = &header->image[(prandom_u32() % hole) & -2]; - return header; -} - void bpf_jit_compile(struct bpf_prog *fp) { struct bpf_binary_header *header = NULL; @@ -850,7 +822,8 @@ void bpf_jit_compile(struct bpf_prog *fp) size = prg_len + lit_len; if (size >= BPF_SIZE_MAX) goto out; - header = bpf_alloc_binary(size, &jit.start); + header = bpf_jit_binary_alloc(size, &jit.start, + 2, bpf_jit_fill_hole); if (!header) goto out; jit.prg = jit.mid = jit.start + prg_len; @@ -884,7 +857,7 @@ void bpf_jit_free(struct bpf_prog *fp) goto free_filter; set_memory_rw(addr, header->pages); - module_free(NULL, header); + bpf_jit_binary_free(header); free_filter: bpf_prog_unlock_free(fp); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 06f8c17f5484..9de0b5476b0c 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -8,12 +8,10 @@ * as published by the Free Software Foundation; version 2 * of the License. */ -#include -#include #include #include #include -#include +#include int bpf_jit_enable __read_mostly; @@ -109,39 +107,6 @@ static inline void bpf_flush_icache(void *start, void *end) #define CHOOSE_LOAD_FUNC(K, func) \ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) -struct bpf_binary_header { - unsigned int pages; - /* Note : for security reasons, bpf code will follow a randomly - * sized amount of int3 instructions - */ - u8 image[]; -}; - -static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen, - u8 **image_ptr) -{ - unsigned int sz, hole; - struct bpf_binary_header *header; - - /* Most of BPF filters are really small, - * but if some of them fill a page, allow at least - * 128 extra bytes to insert a random section of int3 - */ - sz = round_up(proglen + sizeof(*header) + 128, PAGE_SIZE); - header = module_alloc(sz); - if (!header) - return NULL; - - memset(header, 0xcc, sz); /* fill whole space with int3 instructions */ - - header->pages = sz / PAGE_SIZE; - hole = min(sz - (proglen + sizeof(*header)), PAGE_SIZE - sizeof(*header)); - - /* insert a random number of int3 instructions before BPF code */ - *image_ptr = &header->image[prandom_u32() % hole]; - return header; -} - /* pick a register outside of BPF range for JIT internal work */ #define AUX_REG (MAX_BPF_REG + 1) @@ -206,6 +171,12 @@ static inline u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); } +static void jit_fill_hole(void *area, unsigned int size) +{ + /* fill whole space with int3 instructions */ + memset(area, 0xcc, size); +} + struct jit_context { unsigned int cleanup_addr; /* epilogue code offset */ bool seen_ld_abs; @@ -959,7 +930,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog) if (proglen <= 0) { image = NULL; if (header) - module_free(NULL, header); + bpf_jit_binary_free(header); goto out; } if (image) { @@ -969,7 +940,8 @@ void bpf_int_jit_compile(struct bpf_prog *prog) break; } if (proglen == oldproglen) { - header = bpf_alloc_binary(proglen, &image); + header = bpf_jit_binary_alloc(proglen, &image, + 1, jit_fill_hole); if (!header) goto out; } @@ -998,7 +970,7 @@ void bpf_jit_free(struct bpf_prog *fp) goto free_filter; set_memory_rw(addr, header->pages); - module_free(NULL, header); + bpf_jit_binary_free(header); free_filter: bpf_prog_unlock_free(fp); diff --git a/include/linux/filter.h b/include/linux/filter.h index 8f82ef3f1cdd..868764fcffb8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -289,6 +289,11 @@ struct sock_fprog_kern { struct sock_filter *filter; }; +struct bpf_binary_header { + unsigned int pages; + u8 image[]; +}; + struct bpf_work_struct { struct bpf_prog *prog; struct work_struct work; @@ -358,6 +363,14 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags); void __bpf_prog_free(struct bpf_prog *fp); +typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); + +struct bpf_binary_header * +bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, + unsigned int alignment, + bpf_jit_fill_hole_t bpf_fill_ill_insns); +void bpf_jit_binary_free(struct bpf_binary_header *hdr); + static inline void bpf_prog_unlock_free(struct bpf_prog *fp) { bpf_prog_unlock_ro(fp); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2c2bfaacce66..8ee520f0ec70 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -20,9 +20,12 @@ * Andi Kleen - Fix a few bad bugs and races. * Kris Katterjohn - Added many additional checks in bpf_check_classic() */ + #include #include #include +#include +#include #include /* Registers */ @@ -125,6 +128,42 @@ void __bpf_prog_free(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(__bpf_prog_free); +struct bpf_binary_header * +bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, + unsigned int alignment, + bpf_jit_fill_hole_t bpf_fill_ill_insns) +{ + struct bpf_binary_header *hdr; + unsigned int size, hole, start; + + /* Most of BPF filters are really small, but if some of them + * fill a page, allow at least 128 extra bytes to insert a + * random section of illegal instructions. + */ + size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); + hdr = module_alloc(size); + if (hdr == NULL) + return NULL; + + /* Fill space with illegal/arch-dep instructions. */ + bpf_fill_ill_insns(hdr, size); + + hdr->pages = size / PAGE_SIZE; + hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), + PAGE_SIZE - sizeof(*hdr)); + start = (prandom_u32() % hole) & ~(alignment - 1); + + /* Leave a random number of instructions before BPF code. */ + *image_ptr = &hdr->image[start]; + + return hdr; +} + +void bpf_jit_binary_free(struct bpf_binary_header *hdr) +{ + module_free(NULL, hdr); +} + /* Base function for offset calculation. Needs to go into .text section, * therefore keeping it non-static as well; will also be used by JITs * anyway later on, so do not let the compiler omit it. -- cgit v1.2.3 From 286aad3c4014ca825c447e07e24f8929e6d266d2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 8 Sep 2014 08:04:49 +0200 Subject: net: bpf: be friendly to kmemcheck Reported by Mikulas Patocka, kmemcheck currently barks out a false positive since we don't have special kmemcheck annotation for bitfields used in bpf_prog structure. We currently have jited:1, len:31 and thus when accessing len while CONFIG_KMEMCHECK enabled, kmemcheck throws a warning that we're reading uninitialized memory. As we don't need the whole bit universe for pages member, we can just split it to u16 and use a bool flag for jited instead of a bitfield. Signed-off-by: Mikulas Patocka Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 2 +- arch/mips/net/bpf_jit.c | 2 +- arch/powerpc/net/bpf_jit_comp.c | 2 +- arch/s390/net/bpf_jit_comp.c | 2 +- arch/sparc/net/bpf_jit_comp.c | 2 +- arch/x86/net/bpf_jit_comp.c | 2 +- include/linux/filter.h | 6 +++--- net/core/filter.c | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 2d1a5b93d91c..6b45f649eff0 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -933,7 +933,7 @@ void bpf_jit_compile(struct bpf_prog *fp) set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *)ctx.target; - fp->jited = 1; + fp->jited = true; out: kfree(ctx.offsets); return; diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index cfa83cf2447d..0e97ccd29fe3 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -1417,7 +1417,7 @@ void bpf_jit_compile(struct bpf_prog *fp) bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); fp->bpf_func = (void *)ctx.target; - fp->jited = 1; + fp->jited = true; out: kfree(ctx.offsets); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 40c53ff59124..cbae2dfd053c 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -686,7 +686,7 @@ void bpf_jit_compile(struct bpf_prog *fp) ((u64 *)image)[0] = (u64)code_base; ((u64 *)image)[1] = local_paca->kernel_toc; fp->bpf_func = (void *)image; - fp->jited = 1; + fp->jited = true; } out: kfree(addrs); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index b734f975c22e..555f5c7e83ab 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -842,7 +842,7 @@ void bpf_jit_compile(struct bpf_prog *fp) if (jit.start) { set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *) jit.start; - fp->jited = 1; + fp->jited = true; } out: kfree(addrs); diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index f7a736b645e8..b2ad9dc5425e 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -801,7 +801,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; if (image) { bpf_flush_icache(image, image + proglen); fp->bpf_func = (void *)image; - fp->jited = 1; + fp->jited = true; } out: kfree(addrs); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 9de0b5476b0c..d56cd1f515bd 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -955,7 +955,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog) bpf_flush_icache(header, image + proglen); set_memory_ro((unsigned long)header, header->pages); prog->bpf_func = (void *)image; - prog->jited = 1; + prog->jited = true; } out: kfree(addrs); diff --git a/include/linux/filter.h b/include/linux/filter.h index 868764fcffb8..4b59edead908 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -300,9 +300,9 @@ struct bpf_work_struct { }; struct bpf_prog { - u32 pages; /* Number of allocated pages */ - u32 jited:1, /* Is our filter JIT'ed? */ - len:31; /* Number of filter blocks */ + u16 pages; /* Number of allocated pages */ + bool jited; /* Is our filter JIT'ed? */ + u32 len; /* Number of filter blocks */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ struct bpf_work_struct *work; /* Deferred free work struct */ unsigned int (*bpf_func)(const struct sk_buff *skb, diff --git a/net/core/filter.c b/net/core/filter.c index fa5b7d0f77ac..dfc716ffa44b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -972,7 +972,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) int err; fp->bpf_func = NULL; - fp->jited = 0; + fp->jited = false; err = bpf_check_classic(fp->insns, fp->len); if (err) { -- cgit v1.2.3 From a0c7b164ad115ec0556dc0904ee2218cbc5cedfa Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 9 Sep 2014 23:13:57 +0100 Subject: regulator: of: Provide simplified DT parsing method Currently regulator drivers which support DT all repeat very similar code to supply a list of known regulator identifiers to be matched with DT, convert that to platform data which is then matched up with the regulators as they are registered. This is both fiddly to get right and for devices which can use the standard helpers to provide their operations is the main source of code in the driver. Since this code is essentially identical for most drivers we can factor it out into the core, moving the identifiers in the match table into the regulator descriptors and also allowing drivers to pass in the name of the subnode to search. When a driver provides an of_match string for the regulator the core will attempt to use that to obtain init_data, allowing the driver to remove all explicit code for DT parsing and simply provide data instead. The current code leaks the phandles for the child nodes, this will be addressed incrementally and makes no practical difference for FDT anyway as the DT data structures are never freed. Signed-off-by: Mark Brown --- drivers/regulator/core.c | 10 +++++--- drivers/regulator/internal.h | 4 ++++ drivers/regulator/of_regulator.c | 51 ++++++++++++++++++++++++++++++++++++++++ include/linux/regulator/driver.h | 4 ++++ 4 files changed, 66 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index a3c3785901f5..496002efd961 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -3516,12 +3516,17 @@ regulator_register(const struct regulator_desc *regulator_desc, return ERR_PTR(-EINVAL); } - init_data = config->init_data; - rdev = kzalloc(sizeof(struct regulator_dev), GFP_KERNEL); if (rdev == NULL) return ERR_PTR(-ENOMEM); + init_data = regulator_of_get_init_data(dev, regulator_desc, + &rdev->dev.of_node); + if (!init_data) { + init_data = config->init_data; + rdev->dev.of_node = of_node_get(config->of_node); + } + mutex_lock(®ulator_list_mutex); mutex_init(&rdev->mutex); @@ -3548,7 +3553,6 @@ regulator_register(const struct regulator_desc *regulator_desc, /* register with sysfs */ rdev->dev.class = ®ulator_class; - rdev->dev.of_node = of_node_get(config->of_node); rdev->dev.parent = dev; dev_set_name(&rdev->dev, "regulator.%d", atomic_inc_return(®ulator_no) - 1); diff --git a/drivers/regulator/internal.h b/drivers/regulator/internal.h index 84bbda10c396..a6043ad32ead 100644 --- a/drivers/regulator/internal.h +++ b/drivers/regulator/internal.h @@ -35,4 +35,8 @@ struct regulator { struct dentry *debugfs; }; +struct regulator_init_data *regulator_of_get_init_data(struct device *dev, + const struct regulator_desc *desc, + struct device_node **node); + #endif diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index ee5e67bc8d5b..7a51814abdc5 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -14,8 +14,11 @@ #include #include #include +#include #include +#include "internal.h" + static void of_get_regulation_constraints(struct device_node *np, struct regulator_init_data **init_data) { @@ -189,3 +192,51 @@ int of_regulator_match(struct device *dev, struct device_node *node, return count; } EXPORT_SYMBOL_GPL(of_regulator_match); + +struct regulator_init_data *regulator_of_get_init_data(struct device *dev, + const struct regulator_desc *desc, + struct device_node **node) +{ + struct device_node *search, *child; + struct regulator_init_data *init_data = NULL; + const char *name; + + if (!dev->of_node || !desc->of_match) + return NULL; + + if (desc->regulators_node) + search = of_get_child_by_name(dev->of_node, + desc->regulators_node); + else + search = dev->of_node; + + if (!search) { + dev_err(dev, "Failed to find regulator container node\n"); + return NULL; + } + + for_each_child_of_node(search, child) { + name = of_get_property(child, "regulator-compatible", NULL); + if (!name) + name = child->name; + + if (strcmp(desc->of_match, name)) + continue; + + init_data = of_get_regulator_init_data(dev, child); + if (!init_data) { + dev_err(dev, + "failed to parse DT for regulator %s\n", + child->name); + break; + } + + of_node_get(child); + *node = child; + break; + } + + of_node_put(search); + + return init_data; +} diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index bbe03a1924c0..c35f5f97a147 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -203,6 +203,8 @@ enum regulator_type { * * @name: Identifying name for the regulator. * @supply_name: Identifying the regulator supply + * @of_match: Name used to identify regulator in DT. + * @regulators_node: Name of node containing regulator definitions in DT. * @id: Numerical identifier for the regulator. * @ops: Regulator operations table. * @irq: Interrupt number for the regulator. @@ -242,6 +244,8 @@ enum regulator_type { struct regulator_desc { const char *name; const char *supply_name; + const char *of_match; + const char *regulators_node; int id; bool continuous_voltage_range; unsigned n_voltages; -- cgit v1.2.3 From e1effa0144a1ddf5b456c388ffaf784f3c5163fd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 5 Aug 2014 17:19:38 -0400 Subject: ftrace: Annotate the ops operation on update Add three new flags for ftrace_ops: FTRACE_OPS_FL_ADDING FTRACE_OPS_FL_REMOVING FTRACE_OPS_FL_MODIFYING These will be set for the ftrace_ops when they are first added to the function tracing, being removed from function tracing or just having their functions changed from function tracing, respectively. This will be needed to remove the tramp_hash, which can grow quite big. The tramp_hash is used to note what functions a ftrace_ops is using a trampoline for. Denoting which ftrace_ops is being modified, will allow us to use the ftrace_ops hashes themselves, which are much smaller as they have a global flag to denote if a ftrace_ops is tracing all functions, as well as a notrace hash if the ftrace_ops is tracing all but a few. The tramp_hash just creates a hash item for every function, which can go into the 10s of thousands if all functions are using the ftrace_ops trampoline. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 ++++++ kernel/trace/ftrace.c | 45 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ef37286547fc..d9216f6385d9 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -91,6 +91,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * INITIALIZED - The ftrace_ops has already been initialized (first use time * register_ftrace_function() is called, it will initialized the ops) * DELETED - The ops are being deleted, do not let them be registered again. + * ADDING - The ops is in the process of being added. + * REMOVING - The ops is in the process of being removed. + * MODIFYING - The ops is in the process of changing its filter functions. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -102,6 +105,9 @@ enum { FTRACE_OPS_FL_STUB = 1 << 6, FTRACE_OPS_FL_INITIALIZED = 1 << 7, FTRACE_OPS_FL_DELETED = 1 << 8, + FTRACE_OPS_FL_ADDING = 1 << 9, + FTRACE_OPS_FL_REMOVING = 1 << 10, + FTRACE_OPS_FL_MODIFYING = 1 << 11, }; #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 858ac16f8492..e43c793093e5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1057,6 +1057,12 @@ static struct pid * const ftrace_swapper_pid = &init_struct_pid; static struct ftrace_ops *removed_ops; +/* + * Set when doing a global update, like enabling all recs or disabling them. + * It is not set when just updating a single ftrace_ops. + */ +static bool update_all_ops; + #ifndef CONFIG_FTRACE_MCOUNT_RECORD # error Dynamic ftrace depends on MCOUNT_RECORD #endif @@ -2366,6 +2372,13 @@ static void ftrace_run_update_code(int command) FTRACE_WARN_ON(ret); } +static void ftrace_run_modify_code(struct ftrace_ops *ops, int command) +{ + ops->flags |= FTRACE_OPS_FL_MODIFYING; + ftrace_run_update_code(command); + ops->flags &= ~FTRACE_OPS_FL_MODIFYING; +} + static ftrace_func_t saved_ftrace_func; static int ftrace_start_up; @@ -2387,6 +2400,13 @@ static void ftrace_startup_enable(int command) ftrace_run_update_code(command); } +static void ftrace_startup_all(int command) +{ + update_all_ops = true; + ftrace_startup_enable(command); + update_all_ops = false; +} + static int ftrace_startup(struct ftrace_ops *ops, int command) { int ret; @@ -2401,12 +2421,22 @@ static int ftrace_startup(struct ftrace_ops *ops, int command) ftrace_start_up++; command |= FTRACE_UPDATE_CALLS; - ops->flags |= FTRACE_OPS_FL_ENABLED; + /* + * Note that ftrace probes uses this to start up + * and modify functions it will probe. But we still + * set the ADDING flag for modification, as probes + * do not have trampolines. If they add them in the + * future, then the probes will need to distinguish + * between adding and updating probes. + */ + ops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_ADDING; ftrace_hash_rec_enable(ops, 1); ftrace_startup_enable(command); + ops->flags &= ~FTRACE_OPS_FL_ADDING; + return 0; } @@ -2456,11 +2486,12 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * If the ops uses a trampoline, then it needs to be * tested first on update. */ + ops->flags |= FTRACE_OPS_FL_REMOVING; removed_ops = ops; ftrace_run_update_code(command); - removed_ops = NULL; + ops->flags &= ~FTRACE_OPS_FL_REMOVING; /* * Dynamic ops may be freed, we must make sure that all @@ -3373,7 +3404,7 @@ static void __enable_ftrace_function_probe(void) if (ftrace_probe_registered) { /* still need to update the function call sites */ if (ftrace_enabled) - ftrace_run_update_code(FTRACE_UPDATE_CALLS); + ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS); return; } @@ -3792,7 +3823,7 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) static void ftrace_ops_update_code(struct ftrace_ops *ops) { if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled) - ftrace_run_update_code(FTRACE_UPDATE_CALLS); + ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS); } static int @@ -4717,6 +4748,7 @@ core_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } +static inline void ftrace_startup_all(int command) { } /* Keep as macros so we do not need to define the commands */ # define ftrace_startup(ops, command) \ ({ \ @@ -5016,7 +5048,8 @@ static int ftrace_pid_add(int p) set_ftrace_pid_task(pid); ftrace_update_pid_func(); - ftrace_startup_enable(0); + + ftrace_startup_all(0); mutex_unlock(&ftrace_lock); return 0; @@ -5045,7 +5078,7 @@ static void ftrace_pid_reset(void) } ftrace_update_pid_func(); - ftrace_startup_enable(0); + ftrace_startup_all(0); mutex_unlock(&ftrace_lock); } -- cgit v1.2.3 From fef5aeeee9e3717e7aea991a7ae9ff6a7a2d4c85 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 24 Jul 2014 12:25:47 -0400 Subject: ftrace: Replace tramp_hash with old_*_hash to save space Allowing function callbacks to declare their own trampolines requires that each ftrace_ops that has a trampoline must have some sort of accounting that keeps track of which ops has a trampoline attached to a record. The easy way to solve this was to add a "tramp_hash" that created a hash entry for every function that a ops uses with a trampoline. But since we can have literally tens of thousands of functions being traced, that means we need tens of thousands of descriptors to map the ops to the function in the hash. This is quite expensive and can cause enabling and disabling the function graph tracer to take some time to start and stop. It can take up to several seconds to disable or enable all functions in the function graph tracer for this reason. The better approach albeit more complex, is to keep track of how ops are being enabled and disabled, and use that along with the counting of the number of ops attached to records, to determive what ops has a trampoline attached to a record at enabling and disabling of tracing. To do this, the tramp_hash has been replaced with an old_filter_hash and old_notrace_hash, which get the copy of the ops filter_hash and notrace_hash respectively. The old hashes is kept until the ops has been modified or removed and the old hashes are used with the logic of the accounting to determine the ops that have the trampoline of a record. The reason this has less of a footprint is due to the trick that an "empty" hash in the filter_hash means "all functions" and an empty hash in the notrace hash means "no functions" in the hash. This is much more efficienct, doesn't have the delay, and takes up much less memory, as we do not need to map all the functions but just figure out which functions are mapped at the time it is enabled or disabled. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 +- kernel/trace/ftrace.c | 239 +++++++++++++++++-------------------------------- 2 files changed, 85 insertions(+), 156 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d9216f6385d9..662697babd48 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -140,7 +140,7 @@ struct ftrace_ops { int nr_trampolines; struct ftrace_ops_hash local_hash; struct ftrace_ops_hash *func_hash; - struct ftrace_hash *tramp_hash; + struct ftrace_ops_hash old_hash; unsigned long trampoline; #endif }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e43c793093e5..d325a1e76554 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1373,6 +1373,21 @@ update: return 0; } +static bool hash_contains_ip(unsigned long ip, + struct ftrace_ops_hash *hash) +{ + /* + * The function record is a match if it exists in the filter + * hash and not in the notrace hash. Note, an emty hash is + * considered a match for the filter hash, but an empty + * notrace hash is considered not in the notrace hash. + */ + return (ftrace_hash_empty(hash->filter_hash) || + ftrace_lookup_ip(hash->filter_hash, ip)) && + (ftrace_hash_empty(hash->notrace_hash) || + !ftrace_lookup_ip(hash->notrace_hash, ip)); +} + /* * Test the hashes for this ops to see if we want to call * the ops->func or not. @@ -1388,8 +1403,7 @@ update: static int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) { - struct ftrace_hash *filter_hash; - struct ftrace_hash *notrace_hash; + struct ftrace_ops_hash hash; int ret; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS @@ -1402,13 +1416,10 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) return 0; #endif - filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash); - notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash); + hash.filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash); + hash.notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash); - if ((ftrace_hash_empty(filter_hash) || - ftrace_lookup_ip(filter_hash, ip)) && - (ftrace_hash_empty(notrace_hash) || - !ftrace_lookup_ip(notrace_hash, ip))) + if (hash_contains_ip(ip, &hash)) ret = 1; else ret = 0; @@ -1520,46 +1531,6 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec) return keep_regs; } -static void ftrace_remove_tramp(struct ftrace_ops *ops, - struct dyn_ftrace *rec) -{ - /* If TRAMP is not set, no ops should have a trampoline for this */ - if (!(rec->flags & FTRACE_FL_TRAMP)) - return; - - rec->flags &= ~FTRACE_FL_TRAMP; - - if ((!ftrace_hash_empty(ops->func_hash->filter_hash) && - !ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) || - ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) - return; - /* - * The tramp_hash entry will be removed at time - * of update. - */ - ops->nr_trampolines--; -} - -static void ftrace_clear_tramps(struct dyn_ftrace *rec, struct ftrace_ops *ops) -{ - struct ftrace_ops *op; - - /* If TRAMP is not set, no ops should have a trampoline for this */ - if (!(rec->flags & FTRACE_FL_TRAMP)) - return; - - do_for_each_ftrace_op(op, ftrace_ops_list) { - /* - * This function is called to clear other tramps - * not the one that is being updated. - */ - if (op == ops) - continue; - if (op->nr_trampolines) - ftrace_remove_tramp(op, rec); - } while_for_each_ftrace_op(op); -} - static void __ftrace_hash_rec_update(struct ftrace_ops *ops, int filter_hash, bool inc) @@ -1648,18 +1619,16 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, * function, and the ops has a trampoline registered * for it, then we can call it directly. */ - if (ftrace_rec_count(rec) == 1 && ops->trampoline) { + if (ftrace_rec_count(rec) == 1 && ops->trampoline) rec->flags |= FTRACE_FL_TRAMP; - ops->nr_trampolines++; - } else { + else /* * If we are adding another function callback * to this function, and the previous had a * custom trampoline in use, then we need to go * back to the default trampoline. */ - ftrace_clear_tramps(rec, ops); - } + rec->flags &= ~FTRACE_FL_TRAMP; /* * If any ops wants regs saved for this function @@ -1672,9 +1641,6 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, return; rec->flags--; - if (ops->trampoline && !ftrace_rec_count(rec)) - ftrace_remove_tramp(ops, rec); - /* * If the rec had REGS enabled and the ops that is * being removed had REGS set, then see if there is @@ -1688,6 +1654,17 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, rec->flags &= ~FTRACE_FL_REGS; } + /* + * If the rec had TRAMP enabled, then it needs to + * be cleared. As TRAMP can only be enabled iff + * there is only a single ops attached to it. + * In otherwords, always disable it on decrementing. + * In the future, we may set it if rec count is + * decremented to one, and the ops that is left + * has a trampoline. + */ + rec->flags &= ~FTRACE_FL_TRAMP; + /* * flags will be cleared in ftrace_check_record() * if rec count is zero. @@ -1910,15 +1887,14 @@ static struct ftrace_ops * ftrace_find_tramp_ops_any(struct dyn_ftrace *rec) { struct ftrace_ops *op; + unsigned long ip = rec->ip; do_for_each_ftrace_op(op, ftrace_ops_list) { if (!op->trampoline) continue; - if (ftrace_lookup_ip(op->func_hash->filter_hash, rec->ip) && - (ftrace_hash_empty(op->func_hash->notrace_hash) || - !ftrace_lookup_ip(op->func_hash->notrace_hash, rec->ip))) + if (hash_contains_ip(ip, op->func_hash)) return op; } while_for_each_ftrace_op(op); @@ -1929,18 +1905,51 @@ static struct ftrace_ops * ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec) { struct ftrace_ops *op; + unsigned long ip = rec->ip; - /* Removed ops need to be tested first */ - if (removed_ops && removed_ops->tramp_hash) { - if (ftrace_lookup_ip(removed_ops->tramp_hash, rec->ip)) + /* + * Need to check removed ops first. + * If they are being removed, and this rec has a tramp, + * and this rec is in the ops list, then it would be the + * one with the tramp. + */ + if (removed_ops) { + if (hash_contains_ip(ip, &removed_ops->old_hash)) return removed_ops; } + /* + * Need to find the current trampoline for a rec. + * Now, a trampoline is only attached to a rec if there + * was a single 'ops' attached to it. But this can be called + * when we are adding another op to the rec or removing the + * current one. Thus, if the op is being added, we can + * ignore it because it hasn't attached itself to the rec + * yet. That means we just need to find the op that has a + * trampoline and is not beeing added. + */ do_for_each_ftrace_op(op, ftrace_ops_list) { - if (!op->tramp_hash) + + if (!op->trampoline) + continue; + + /* + * If the ops is being added, it hasn't gotten to + * the point to be removed from this tree yet. + */ + if (op->flags & FTRACE_OPS_FL_ADDING) continue; - if (ftrace_lookup_ip(op->tramp_hash, rec->ip)) + /* + * If the ops is not being added and has a trampoline, + * then it must be the one that we want! + */ + if (hash_contains_ip(ip, op->func_hash)) + return op; + + /* If the ops is being modified, it may be in the old hash. */ + if ((op->flags & FTRACE_OPS_FL_MODIFYING) && + hash_contains_ip(ip, &op->old_hash)) return op; } while_for_each_ftrace_op(op); @@ -1952,10 +1961,11 @@ static struct ftrace_ops * ftrace_find_tramp_ops_new(struct dyn_ftrace *rec) { struct ftrace_ops *op; + unsigned long ip = rec->ip; do_for_each_ftrace_op(op, ftrace_ops_list) { /* pass rec in as regs to have non-NULL val */ - if (ftrace_ops_test(op, rec->ip, rec)) + if (hash_contains_ip(ip, op->func_hash)) return op; } while_for_each_ftrace_op(op); @@ -2262,92 +2272,6 @@ void __weak arch_ftrace_update_code(int command) ftrace_run_stop_machine(command); } -static int ftrace_save_ops_tramp_hash(struct ftrace_ops *ops) -{ - struct ftrace_page *pg; - struct dyn_ftrace *rec; - int size, bits; - int ret; - - size = ops->nr_trampolines; - bits = 0; - /* - * Make the hash size about 1/2 the # found - */ - for (size /= 2; size; size >>= 1) - bits++; - - ops->tramp_hash = alloc_ftrace_hash(bits); - /* - * TODO: a failed allocation is going to screw up - * the accounting of what needs to be modified - * and not. For now, we kill ftrace if we fail - * to allocate here. But there are ways around this, - * but that will take a little more work. - */ - if (!ops->tramp_hash) - return -ENOMEM; - - do_for_each_ftrace_rec(pg, rec) { - if (ftrace_rec_count(rec) == 1 && - ftrace_ops_test(ops, rec->ip, rec)) { - - /* - * If another ops adds to a rec, the rec will - * lose its trampoline and never get it back - * until all ops are off of it. - */ - if (!(rec->flags & FTRACE_FL_TRAMP)) - continue; - - /* This record had better have a trampoline */ - if (FTRACE_WARN_ON(!(rec->flags & FTRACE_FL_TRAMP_EN))) - return -1; - - ret = add_hash_entry(ops->tramp_hash, rec->ip); - if (ret < 0) - return ret; - } - } while_for_each_ftrace_rec(); - - /* The number of recs in the hash must match nr_trampolines */ - if (FTRACE_WARN_ON(ops->tramp_hash->count != ops->nr_trampolines)) - pr_warn("count=%ld trampolines=%d\n", - ops->tramp_hash->count, - ops->nr_trampolines); - - return 0; -} - -static int ftrace_save_tramp_hashes(void) -{ - struct ftrace_ops *op; - int ret; - - /* - * Now that any trampoline is being used, we need to save the - * hashes for the ops that have them. This allows the mapping - * back from the record to the ops that has the trampoline to - * know what code is being replaced. Modifying code must always - * verify what it is changing. - */ - do_for_each_ftrace_op(op, ftrace_ops_list) { - - /* The tramp_hash is recreated each time. */ - free_ftrace_hash(op->tramp_hash); - op->tramp_hash = NULL; - - if (op->nr_trampolines) { - ret = ftrace_save_ops_tramp_hash(op); - if (ret) - return ret; - } - - } while_for_each_ftrace_op(op); - - return 0; -} - static void ftrace_run_update_code(int command) { int ret; @@ -2367,9 +2291,6 @@ static void ftrace_run_update_code(int command) ret = ftrace_arch_code_modify_post_process(); FTRACE_WARN_ON(ret); - - ret = ftrace_save_tramp_hashes(); - FTRACE_WARN_ON(ret); } static void ftrace_run_modify_code(struct ftrace_ops *ops, int command) @@ -2489,8 +2410,16 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) ops->flags |= FTRACE_OPS_FL_REMOVING; removed_ops = ops; + /* The trampoline logic checks the old hashes */ + ops->old_hash.filter_hash = ops->func_hash->filter_hash; + ops->old_hash.notrace_hash = ops->func_hash->notrace_hash; + ftrace_run_update_code(command); + ops->old_hash.filter_hash = NULL; + ops->old_hash.notrace_hash = NULL; + + removed_ops = NULL; ops->flags &= ~FTRACE_OPS_FL_REMOVING; /* @@ -3017,7 +2946,7 @@ static int t_show(struct seq_file *m, void *v) struct ftrace_ops *ops; ops = ftrace_find_tramp_ops_any(rec); - if (ops && ops->trampoline) + if (ops) seq_printf(m, "\ttramp: %pS", (void *)ops->trampoline); else -- cgit v1.2.3 From 3a630178fd5f30c285fd7016c5340a176b625913 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Aug 2014 10:52:04 -0700 Subject: tracing: generate RCU warnings even when tracepoints are disabled Dave Jones reported seeing a bug from one of my TLB tracepoints: http://lkml.kernel.org/r/20140806181801.GA4605@redhat.com I've been running these patches for months and never saw this. But, a big chunk of my testing, especially with all the debugging enabled, was in a vm where intel_idle doesn't work. On the systems where I was using intel_idle, I never had lockdep enabled and this tracepoint on at the same time. This patch ensures that whenever we have lockdep available, we do _some_ RCU activity at the site of the tracepoint, despite whether the tracepoint's condition matches or even if the tracepoint itself is completely disabled. This is a bit of a hack, but it is pretty self-contained. I confirmed that with this patch plus lockdep I get the same splat as Dave Jones did, but without enabling the tracepoint explicitly. Link: http://lkml.kernel.org/p/20140807175204.C257CAC5@viggo.jf.intel.com Signed-off-by: Dave Hansen Cc: Dave Hansen Cc: Dave Jones , Cc: paulmck@linux.vnet.ibm.com Cc: Ingo Molnar Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index b1293f15f592..e08e21e5f601 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -157,6 +157,12 @@ extern void syscall_unregfunc(void); * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. + * + * When lockdep is enabled, we make sure to always do the RCU portions of + * the tracepoint code, regardless of whether tracing is on or we match the + * condition. This lets us find RCU issues triggered with tracepoints even + * when this tracepoint is off. This code has no purpose other than poking + * RCU a bit. */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ extern struct tracepoint __tracepoint_##name; \ @@ -167,6 +173,11 @@ extern void syscall_unregfunc(void); TP_PROTO(data_proto), \ TP_ARGS(data_args), \ TP_CONDITION(cond),,); \ + if (IS_ENABLED(CONFIG_LOCKDEP)) { \ + rcu_read_lock_sched_notrace(); \ + rcu_dereference_sched(__tracepoint_##name.funcs);\ + rcu_read_unlock_sched_notrace(); \ + } \ } \ __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ PARAMS(cond), PARAMS(data_proto), PARAMS(data_args)) \ -- cgit v1.2.3 From 378520b837cf4da769600b83690d8e825f16a611 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 7 Aug 2014 10:15:02 +0800 Subject: nfs41: add a helper function to set layoutcommit after commit Track lwb in nfs_commit_data so that we can use it to setup layoutcommit in commit_done callback. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 29 +++++++++++++++++++++++++++++ fs/nfs/pnfs.h | 1 + fs/nfs/write.c | 15 +++++++++++++++ include/linux/nfs_xdr.h | 1 + 4 files changed, 46 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a3851debf8a2..a6586cdee211 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1797,6 +1797,35 @@ pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) } EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); +void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data) +{ + struct inode *inode = data->inode; + struct nfs_inode *nfsi = NFS_I(inode); + bool mark_as_dirty = false; + + spin_lock(&inode->i_lock); + if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { + mark_as_dirty = true; + dprintk("%s: Set layoutcommit for inode %lu ", + __func__, inode->i_ino); + } + if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) { + /* references matched in nfs4_layoutcommit_release */ + pnfs_get_lseg(data->lseg); + } + if (data->lwb > nfsi->layout->plh_lwb) + nfsi->layout->plh_lwb = data->lwb; + spin_unlock(&inode->i_lock); + dprintk("%s: lseg %p end_pos %llu\n", + __func__, data->lseg, nfsi->layout->plh_lwb); + + /* if pnfs_layoutcommit_inode() runs between inode locks, the next one + * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ + if (mark_as_dirty) + mark_inode_dirty_sync(inode); +} +EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit); + void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) { struct nfs_server *nfss = NFS_SERVER(data->args.inode); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index aca3dff5dae6..79c63114ce77 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -219,6 +219,7 @@ void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); void pnfs_set_layoutcommit(struct nfs_pgio_header *); +void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 175d5d073ccf..3c5638f381cd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1538,6 +1538,18 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, } EXPORT_SYMBOL_GPL(nfs_initiate_commit); +static loff_t nfs_get_lwb(struct list_head *head) +{ + loff_t lwb = 0; + struct nfs_page *req; + + list_for_each_entry(req, head, wb_list) + if (lwb < (req_offset(req) + req->wb_bytes)) + lwb = req_offset(req) + req->wb_bytes; + + return lwb; +} + /* * Set up the argument/result storage required for the RPC call. */ @@ -1557,6 +1569,9 @@ void nfs_init_commit(struct nfs_commit_data *data, data->inode = inode; data->cred = first->wb_context->cred; data->lseg = lseg; /* reference transferred */ + /* only set lwb for pnfs commit */ + if (lseg) + data->lwb = nfs_get_lwb(&data->pages); data->mds_ops = &nfs_commit_ops; data->completion_ops = cinfo->completion_ops; data->dreq = cinfo->dreq; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0040629894df..e563b2c976ef 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1328,6 +1328,7 @@ struct nfs_commit_data { struct pnfs_layout_segment *lseg; struct nfs_client *ds_clp; /* pNFS data server */ int ds_commit_index; + loff_t lwb; const struct rpc_call_ops *mds_ops; const struct nfs_commit_completion_ops *completion_ops; int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); -- cgit v1.2.3 From 5f919c9f10c1cf821ee5f414683214a361a1b98c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Aug 2014 11:09:25 -0500 Subject: pnfs: allow splicing pre-encoded pages into the layoutcommit args Currently there is no XDR buffer space allocated for the per-layout driver layoutcommit payload, which leads to server buffer overflows in the blocklayout driver even under simple workloads. As we can't do per-layout sizes for XDR operations we'll have to splice a previously encoded list of pages into the XDR stream, similar to how we handle ACL buffers. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 18 +++++++++++++----- fs/nfs/pnfs.c | 15 +++++++++++++++ fs/nfs/pnfs.h | 4 ++-- include/linux/nfs_xdr.h | 3 +++ 4 files changed, 33 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e13b59d8d9aa..f2cd957adb90 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -395,7 +395,10 @@ static int nfs4_stat_to_errno(int); 2 /* last byte written */ + \ 1 /* nt_timechanged (false) */ + \ 1 /* layoutupdate4 layout type */ + \ - 1 /* NULL filelayout layoutupdate4 payload */) + 1 /* layoutupdate4 opaqueue len */) + /* the actual content of layoutupdate4 should + be allocated by drivers and spliced in + using xdr_write_pages */ #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ @@ -1990,7 +1993,7 @@ encode_layoutget(struct xdr_stream *xdr, static int encode_layoutcommit(struct xdr_stream *xdr, struct inode *inode, - const struct nfs4_layoutcommit_args *args, + struct nfs4_layoutcommit_args *args, struct compound_hdr *hdr) { __be32 *p; @@ -2011,11 +2014,16 @@ encode_layoutcommit(struct xdr_stream *xdr, *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ - if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) + if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) { NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( NFS_I(inode)->layout, xdr, args); - else - encode_uint32(xdr, 0); /* no layout-type payload */ + } else { + encode_uint32(xdr, args->layoutupdate_len); + if (args->layoutupdate_pages) { + xdr_write_pages(xdr, args->layoutupdate_pages, 0, + args->layoutupdate_len); + } + } return 0; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 57b5728e0b8e..8827ab130ed3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1854,6 +1854,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; struct nfs4_layoutcommit_data *data; struct nfs_inode *nfsi = NFS_I(inode); loff_t end_pos; @@ -1904,6 +1905,20 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) data->args.lastbytewritten = end_pos - 1; data->res.server = NFS_SERVER(inode); + if (ld->prepare_layoutcommit) { + status = ld->prepare_layoutcommit(&data->args); + if (status) { + spin_lock(&inode->i_lock); + if (end_pos < nfsi->layout->plh_lwb) + nfsi->layout->plh_lwb = end_pos; + spin_unlock(&inode->i_lock); + put_rpccred(data->cred); + set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); + goto clear_layoutcommitting; + } + } + + status = nfs4_proc_layoutcommit(data, sync); out: if (status) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1dd8a5e96c9f..8835b5a320cc 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -128,8 +128,8 @@ struct pnfs_layoutdriver_type { const struct nfs4_layoutreturn_args *args); void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); - - void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, + int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args); + void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, const struct nfs4_layoutcommit_args *args); }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e563b2c976ef..f4092c6b90fb 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -279,6 +279,9 @@ struct nfs4_layoutcommit_args { __u64 lastbytewritten; struct inode *inode; const u32 *bitmask; + size_t layoutupdate_len; + struct page *layoutupdate_page; + struct page **layoutupdate_pages; }; struct nfs4_layoutcommit_res { -- cgit v1.2.3 From b954d83421d51d822c42e5ab7b65069b25ad3005 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 10 Sep 2014 15:01:02 +0200 Subject: net: bpf: only build bpf_jit_binary_{alloc, free}() when jit selected Since BPF JIT depends on the availability of module_alloc() and module_free() helpers (HAVE_BPF_JIT and MODULES), we better build that code only in case we have BPF_JIT in our config enabled, just like with other JIT code. Fixes builds for arm/marzen_defconfig and sh/rsk7269_defconfig. ==================== kernel/built-in.o: In function `bpf_jit_binary_alloc': /home/cwang/linux/kernel/bpf/core.c:144: undefined reference to `module_alloc' kernel/built-in.o: In function `bpf_jit_binary_free': /home/cwang/linux/kernel/bpf/core.c:164: undefined reference to `module_free' make: *** [vmlinux] Error 1 ==================== Reported-by: Fengguang Wu Fixes: 738cbe72adc5 ("net: bpf: consolidate JIT binary allocator") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 78 +++++++++++++++++++++++++------------------------- kernel/bpf/core.c | 2 ++ 2 files changed, 41 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 4b59edead908..1a0bc6d134d7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -4,12 +4,18 @@ #ifndef __LINUX_FILTER_H__ #define __LINUX_FILTER_H__ +#include + #include #include #include +#include +#include #include -#include + #include + +#include #include struct sk_buff; @@ -363,14 +369,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags); void __bpf_prog_free(struct bpf_prog *fp); -typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); - -struct bpf_binary_header * -bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, - unsigned int alignment, - bpf_jit_fill_hole_t bpf_fill_ill_insns); -void bpf_jit_binary_free(struct bpf_binary_header *hdr); - static inline void bpf_prog_unlock_free(struct bpf_prog *fp) { bpf_prog_unlock_ro(fp); @@ -393,6 +391,38 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); void bpf_int_jit_compile(struct bpf_prog *fp); +#ifdef CONFIG_BPF_JIT +typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); + +struct bpf_binary_header * +bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, + unsigned int alignment, + bpf_jit_fill_hole_t bpf_fill_ill_insns); +void bpf_jit_binary_free(struct bpf_binary_header *hdr); + +void bpf_jit_compile(struct bpf_prog *fp); +void bpf_jit_free(struct bpf_prog *fp); + +static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, + u32 pass, void *image) +{ + pr_err("flen=%u proglen=%u pass=%u image=%pK\n", + flen, proglen, pass, image); + if (image) + print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, + 16, 1, image, proglen, false); +} +#else +static inline void bpf_jit_compile(struct bpf_prog *fp) +{ +} + +static inline void bpf_jit_free(struct bpf_prog *fp) +{ + bpf_prog_unlock_free(fp); +} +#endif /* CONFIG_BPF_JIT */ + #define BPF_ANC BIT(15) static inline u16 bpf_anc_helper(const struct sock_filter *ftest) @@ -440,36 +470,6 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k, return bpf_internal_load_pointer_neg_helper(skb, k, size); } -#ifdef CONFIG_BPF_JIT -#include -#include -#include - -void bpf_jit_compile(struct bpf_prog *fp); -void bpf_jit_free(struct bpf_prog *fp); - -static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, - u32 pass, void *image) -{ - pr_err("flen=%u proglen=%u pass=%u image=%pK\n", - flen, proglen, pass, image); - if (image) - print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, - 16, 1, image, proglen, false); -} -#else -#include - -static inline void bpf_jit_compile(struct bpf_prog *fp) -{ -} - -static inline void bpf_jit_free(struct bpf_prog *fp) -{ - bpf_prog_unlock_free(fp); -} -#endif /* CONFIG_BPF_JIT */ - static inline int bpf_tell_extensions(void) { return SKF_AD_MAX; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8ee520f0ec70..8b7002488251 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -128,6 +128,7 @@ void __bpf_prog_free(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(__bpf_prog_free); +#ifdef CONFIG_BPF_JIT struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, @@ -163,6 +164,7 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr) { module_free(NULL, hdr); } +#endif /* CONFIG_BPF_JIT */ /* Base function for offset calculation. Needs to go into .text section, * therefore keeping it non-static as well; will also be used by JITs -- cgit v1.2.3 From 6314b6796e3c070d4c8086b08dfd453a0aeac4cf Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 4 Sep 2014 23:37:49 -0700 Subject: clk: Don't hold prepare_lock across debugfs creation Rob Clark reports a lockdep splat that involves the prepare_lock chained with the mmap semaphore. ====================================================== [ INFO: possible circular locking dependency detected ] 3.17.0-rc1-00050-g07a489b #802 Tainted: G W ------------------------------------------------------- Xorg.bin/5413 is trying to acquire lock: (prepare_lock){+.+.+.}, at: [] clk_prepare_lock+0x88/0xfc but task is already holding lock: (qcom_iommu_lock){+.+...}, at: [] qcom_iommu_unmap+0x1c/0x1f0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (qcom_iommu_lock){+.+...}: [] qcom_iommu_map+0x28/0x450 [] iommu_map+0xc8/0x12c [] msm_iommu_map+0xb4/0x130 [] msm_gem_get_iova_locked+0x9c/0xe8 [] msm_gem_get_iova+0x4c/0x64 [] mdp4_kms_init+0x4c4/0x6c0 [] msm_load+0x2ac/0x34c [] drm_dev_register+0xac/0x108 [] drm_platform_init+0x50/0xf0 [] try_to_bring_up_master.part.3+0xc8/0x108 [] component_master_add_with_match+0xa8/0x104 [] msm_pdev_probe+0x64/0x70 [] platform_drv_probe+0x2c/0x60 [] driver_probe_device+0x108/0x234 [] bus_for_each_drv+0x64/0x98 [] device_attach+0x78/0x8c [] bus_probe_device+0x88/0xac [] deferred_probe_work_func+0x68/0x9c [] process_one_work+0x1a0/0x40c [] worker_thread+0x44/0x4d8 [] kthread+0xd8/0xec [] ret_from_fork+0x14/0x2c -> #3 (&dev->struct_mutex){+.+.+.}: [] drm_gem_mmap+0x38/0xd0 [] msm_gem_mmap+0xc/0x5c [] mmap_region+0x35c/0x6c8 [] do_mmap_pgoff+0x314/0x398 [] vm_mmap_pgoff+0x84/0xb4 [] SyS_mmap_pgoff+0x94/0xbc [] ret_fast_syscall+0x0/0x48 -> #2 (&mm->mmap_sem){++++++}: [] filldir64+0x68/0x180 [] dcache_readdir+0x188/0x22c [] iterate_dir+0x9c/0x11c [] SyS_getdents64+0x78/0xe8 [] ret_fast_syscall+0x0/0x48 -> #1 (&sb->s_type->i_mutex_key#3){+.+.+.}: [] __create_file+0x58/0x1dc [] debugfs_create_dir+0x1c/0x24 [] clk_debug_create_subtree+0x20/0x170 [] clk_debug_init+0xec/0x14c [] do_one_initcall+0x8c/0x1c8 [] kernel_init_freeable+0x13c/0x1dc [] kernel_init+0x8/0xe8 [] ret_from_fork+0x14/0x2c -> #0 (prepare_lock){+.+.+.}: [] mutex_lock_nested+0x70/0x3e8 [] clk_prepare_lock+0x88/0xfc [] clk_prepare+0xc/0x24 [] __enable_clocks.isra.4+0x18/0xa4 [] __flush_iotlb_va+0xe0/0x114 [] qcom_iommu_unmap+0xac/0x1f0 [] iommu_unmap+0x9c/0xe8 [] msm_iommu_unmap+0x64/0x84 [] msm_gem_free_object+0x11c/0x338 [] drm_gem_object_handle_unreference_unlocked+0xfc/0x130 [] drm_gem_object_release_handle+0x50/0x68 [] idr_for_each+0xa8/0xdc [] drm_gem_release+0x1c/0x28 [] drm_release+0x370/0x428 [] __fput+0x98/0x1e8 [] task_work_run+0xb0/0xfc [] do_exit+0x2ec/0x948 [] do_group_exit+0x4c/0xb8 [] get_signal+0x28c/0x6ac [] do_signal+0xc4/0x3e4 [] do_work_pending+0xb4/0xc4 [] work_pending+0xc/0x20 other info that might help us debug this: Chain exists of: prepare_lock --> &dev->struct_mutex --> qcom_iommu_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(qcom_iommu_lock); lock(&dev->struct_mutex); lock(qcom_iommu_lock); lock(prepare_lock); *** DEADLOCK *** 3 locks held by Xorg.bin/5413: #0: (drm_global_mutex){+.+.+.}, at: [] drm_release+0x34/0x428 #1: (&dev->struct_mutex){+.+.+.}, at: [] drm_gem_object_handle_unreference_unlocked+0xcc/0x130 #2: (qcom_iommu_lock){+.+...}, at: [] qcom_iommu_unmap+0x1c/0x1f0 stack backtrace: CPU: 1 PID: 5413 Comm: Xorg.bin Tainted: G W 3.17.0-rc1-00050-g07a489b #802 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x98/0xb8) [] (dump_stack) from [] (print_circular_bug+0x218/0x340) [] (print_circular_bug) from [] (__lock_acquire+0x1d24/0x20b8) [] (__lock_acquire) from [] (lock_acquire+0x9c/0xbc) [] (lock_acquire) from [] (mutex_lock_nested+0x70/0x3e8) [] (mutex_lock_nested) from [] (clk_prepare_lock+0x88/0xfc) [] (clk_prepare_lock) from [] (clk_prepare+0xc/0x24) [] (clk_prepare) from [] (__enable_clocks.isra.4+0x18/0xa4) [] (__enable_clocks.isra.4) from [] (__flush_iotlb_va+0xe0/0x114) [] (__flush_iotlb_va) from [] (qcom_iommu_unmap+0xac/0x1f0) [] (qcom_iommu_unmap) from [] (iommu_unmap+0x9c/0xe8) [] (iommu_unmap) from [] (msm_iommu_unmap+0x64/0x84) [] (msm_iommu_unmap) from [] (msm_gem_free_object+0x11c/0x338) [] (msm_gem_free_object) from [] (drm_gem_object_handle_unreference_unlocked+0xfc/0x130) [] (drm_gem_object_handle_unreference_unlocked) from [] (drm_gem_object_release_handle+0x50/0x68) [] (drm_gem_object_release_handle) from [] (idr_for_each+0xa8/0xdc) [] (idr_for_each) from [] (drm_gem_release+0x1c/0x28) [] (drm_gem_release) from [] (drm_release+0x370/0x428) [] (drm_release) from [] (__fput+0x98/0x1e8) [] (__fput) from [] (task_work_run+0xb0/0xfc) [] (task_work_run) from [] (do_exit+0x2ec/0x948) [] (do_exit) from [] (do_group_exit+0x4c/0xb8) [] (do_group_exit) from [] (get_signal+0x28c/0x6ac) [] (get_signal) from [] (do_signal+0xc4/0x3e4) [] (do_signal) from [] (do_work_pending+0xb4/0xc4) [] (do_work_pending) from [] (work_pending+0xc/0x20) We can break this chain if we don't hold the prepare_lock while creating debugfs directories. We only hold the prepare_lock right now because we're traversing the clock tree recursively and we don't want the hierarchy to change during the traversal. Replacing this traversal with a simple linked list walk allows us to only grab a list lock instead of the prepare_lock, thus breaking the lock chain. Signed-off-by: Stephen Boyd Signed-off-by: Mike Turquette --- drivers/clk/clk.c | 73 +++++++++++++++++---------------------------- include/linux/clk-private.h | 1 + 2 files changed, 28 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index b76fa69b44cb..8ca28189e4e9 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -100,6 +100,8 @@ static void clk_enable_unlock(unsigned long flags) static struct dentry *rootdir; static int inited = 0; +static DEFINE_MUTEX(clk_debug_lock); +static HLIST_HEAD(clk_debug_list); static struct hlist_head *all_lists[] = { &clk_root_list, @@ -300,28 +302,6 @@ out: return ret; } -/* caller must hold prepare_lock */ -static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry) -{ - struct clk *child; - int ret = -EINVAL;; - - if (!clk || !pdentry) - goto out; - - ret = clk_debug_create_one(clk, pdentry); - - if (ret) - goto out; - - hlist_for_each_entry(child, &clk->children, child_node) - clk_debug_create_subtree(child, pdentry); - - ret = 0; -out: - return ret; -} - /** * clk_debug_register - add a clk node to the debugfs clk tree * @clk: the clk being added to the debugfs clk tree @@ -329,20 +309,21 @@ out: * Dynamically adds a clk to the debugfs clk tree if debugfs has been * initialized. Otherwise it bails out early since the debugfs clk tree * will be created lazily by clk_debug_init as part of a late_initcall. - * - * Caller must hold prepare_lock. Only clk_init calls this function (so - * far) so this is taken care. */ static int clk_debug_register(struct clk *clk) { int ret = 0; + mutex_lock(&clk_debug_lock); + hlist_add_head(&clk->debug_node, &clk_debug_list); + if (!inited) - goto out; + goto unlock; - ret = clk_debug_create_subtree(clk, rootdir); + ret = clk_debug_create_one(clk, rootdir); +unlock: + mutex_unlock(&clk_debug_lock); -out: return ret; } @@ -353,12 +334,18 @@ out: * Dynamically removes a clk and all it's children clk nodes from the * debugfs clk tree if clk->dentry points to debugfs created by * clk_debug_register in __clk_init. - * - * Caller must hold prepare_lock. */ static void clk_debug_unregister(struct clk *clk) { + mutex_lock(&clk_debug_lock); + if (!clk->dentry) + goto out; + + hlist_del_init(&clk->debug_node); debugfs_remove_recursive(clk->dentry); + clk->dentry = NULL; +out: + mutex_unlock(&clk_debug_lock); } struct dentry *clk_debugfs_add_file(struct clk *clk, char *name, umode_t mode, @@ -415,17 +402,12 @@ static int __init clk_debug_init(void) if (!d) return -ENOMEM; - clk_prepare_lock(); - - hlist_for_each_entry(clk, &clk_root_list, child_node) - clk_debug_create_subtree(clk, rootdir); - - hlist_for_each_entry(clk, &clk_orphan_list, child_node) - clk_debug_create_subtree(clk, rootdir); + mutex_lock(&clk_debug_lock); + hlist_for_each_entry(clk, &clk_debug_list, debug_node) + clk_debug_create_one(clk, rootdir); inited = 1; - - clk_prepare_unlock(); + mutex_unlock(&clk_debug_lock); return 0; } @@ -2087,14 +2069,16 @@ void clk_unregister(struct clk *clk) { unsigned long flags; - if (!clk || WARN_ON_ONCE(IS_ERR(clk))) - return; + if (!clk || WARN_ON_ONCE(IS_ERR(clk))) + return; + + clk_debug_unregister(clk); clk_prepare_lock(); if (clk->ops == &clk_nodrv_ops) { pr_err("%s: unregistered clock: %s\n", __func__, clk->name); - goto out; + return; } /* * Assign empty clock ops for consumers that might still hold @@ -2113,16 +2097,13 @@ void clk_unregister(struct clk *clk) clk_set_parent(child, NULL); } - clk_debug_unregister(clk); - hlist_del_init(&clk->child_node); if (clk->prepare_count) pr_warn("%s: unregistering prepared clock: %s\n", __func__, clk->name); - kref_put(&clk->ref, __clk_release); -out: + clk_prepare_unlock(); } EXPORT_SYMBOL_GPL(clk_unregister); diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h index efbf70b9fd84..4ed34105c371 100644 --- a/include/linux/clk-private.h +++ b/include/linux/clk-private.h @@ -48,6 +48,7 @@ struct clk { unsigned long accuracy; struct hlist_head children; struct hlist_node child_node; + struct hlist_node debug_node; unsigned int notifier_count; #ifdef CONFIG_DEBUG_FS struct dentry *dentry; -- cgit v1.2.3 From 184c3fc3f52fb75800deb76deffb70907d1f76ea Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Thu, 11 Sep 2014 09:47:23 +0930 Subject: moduleparam: Resolve missing-field-initializer warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve a missing-field-initializer warning, that is produced by every reference to module_param_call, by using designated initialization for the first field. That is enough to silence the complaint. The message is only seen when doing a W=2 build. I happened to be using gcc 4.8.3, but I think most versions would produce the warning when it is enabled. It can either be silenced by using even a single designated initializer as I did here, or providing values for all of the fields. Because of the number of references to the macro, this change silences many warnings in W=2 builds. One instance of the full warning message looks like this: /home/share/git/nn-mdr/include/linux/moduleparam.h:198:16: warning: missing initializer for field ‘free’ of ‘struct kernel_param_ops’ [-Wmissing-field-initializers] static struct kernel_param_ops __param_ops_##name = \ ^ /home/share/git/nn-mdr/fs/fuse/inode.c:35:1: note: in expansion of macro ‘module_param_call’ module_param_call(max_user_bgreq, set_global_limit, param_get_uint, ^ /home/share/git/nn-mdr/include/linux/moduleparam.h:56:9: note: ‘free’ declared here void (*free)(void *arg); Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 593501996574..b43f4752304e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -224,7 +224,7 @@ struct kparam_array /* Obsolete - use module_param_cb() */ #define module_param_call(name, set, get, arg, perm) \ static struct kernel_param_ops __param_ops_##name = \ - { 0, (void *)set, (void *)get }; \ + { .flags = 0, (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, \ (perm) + sizeof(__check_old_set_param(set))*0, -1, 0) -- cgit v1.2.3 From 3d598f47e804a77208c6bb0a454123018e2f2281 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 Aug 2014 20:29:12 +0300 Subject: dmaengine: dw: move dw_dmac.h to where it belongs to There is a common storage for platform data related structures and definitions inside kernel source tree. The patch moves file from include/linux to include/linux/platform_data and renames it acoordingly. The users are also updated. Signed-off-by: Andy Shevchenko Acked-by: Viresh Kumar [For the arch/avr32/.* and .*sound/atmel.*] Acked-by: Hans-Christian Egtvedt Signed-off-by: Vinod Koul --- MAINTAINERS | 2 +- arch/avr32/mach-at32ap/at32ap700x.c | 2 +- arch/avr32/mach-at32ap/include/mach/atmel-mci.h | 2 +- drivers/dma/dw/internal.h | 2 +- drivers/dma/dw/regs.h | 6 +- include/linux/dw_dmac.h | 111 ------------------------ include/linux/platform_data/dma-dw.h | 111 ++++++++++++++++++++++++ include/sound/atmel-abdac.h | 2 +- include/sound/atmel-ac97c.h | 2 +- sound/atmel/abdac.c | 2 +- sound/atmel/ac97c.c | 2 +- 11 files changed, 122 insertions(+), 122 deletions(-) delete mode 100644 include/linux/dw_dmac.h create mode 100644 include/linux/platform_data/dma-dw.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index aefa94841ff3..a10072963889 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7875,7 +7875,7 @@ SYNOPSYS DESIGNWARE DMAC DRIVER M: Viresh Kumar M: Andy Shevchenko S: Maintained -F: include/linux/dw_dmac.h +F: include/linux/platform_data/dma-dw.h F: drivers/dma/dw/ SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index db85b5ec3351..2a1aa712c6e7 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -7,7 +7,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/avr32/mach-at32ap/include/mach/atmel-mci.h b/arch/avr32/mach-at32ap/include/mach/atmel-mci.h index 4bba58561d5c..11d7f4b28dc8 100644 --- a/arch/avr32/mach-at32ap/include/mach/atmel-mci.h +++ b/arch/avr32/mach-at32ap/include/mach/atmel-mci.h @@ -1,7 +1,7 @@ #ifndef __MACH_ATMEL_MCI_H #define __MACH_ATMEL_MCI_H -#include +#include /** * struct mci_dma_data - DMA data for MCI interface diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h index 32667f9e0dda..43cc1dfad5c9 100644 --- a/drivers/dma/dw/internal.h +++ b/drivers/dma/dw/internal.h @@ -12,7 +12,7 @@ #define _DW_DMAC_INTERNAL_H #include -#include +#include #include "regs.h" diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index bb98d3e91e8b..af02439155e9 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -11,7 +11,7 @@ #include #include -#include +#include #define DW_DMA_MAX_NR_CHANNELS 8 #define DW_DMA_MAX_NR_REQUESTS 16 @@ -161,7 +161,7 @@ struct dw_dma_regs { #define DWC_CTLH_DONE 0x00001000 #define DWC_CTLH_BLOCK_TS_MASK 0x00000fff -/* Bitfields in CFG_LO. Platform-configurable bits are in */ +/* Bitfields in CFG_LO. Platform-configurable bits are in */ #define DWC_CFGL_CH_PRIOR_MASK (0x7 << 5) /* priority mask */ #define DWC_CFGL_CH_PRIOR(x) ((x) << 5) /* priority */ #define DWC_CFGL_CH_SUSP (1 << 8) /* pause xfer */ @@ -172,7 +172,7 @@ struct dw_dma_regs { #define DWC_CFGL_RELOAD_SAR (1 << 30) #define DWC_CFGL_RELOAD_DAR (1 << 31) -/* Bitfields in CFG_HI. Platform-configurable bits are in */ +/* Bitfields in CFG_HI. Platform-configurable bits are in */ #define DWC_CFGH_DS_UPD_EN (1 << 5) #define DWC_CFGH_SS_UPD_EN (1 << 6) diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h deleted file mode 100644 index 68b4024184de..000000000000 --- a/include/linux/dw_dmac.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Driver for the Synopsys DesignWare DMA Controller - * - * Copyright (C) 2007 Atmel Corporation - * Copyright (C) 2010-2011 ST Microelectronics - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef DW_DMAC_H -#define DW_DMAC_H - -#include - -/** - * struct dw_dma_slave - Controller-specific information about a slave - * - * @dma_dev: required DMA master device. Depricated. - * @bus_id: name of this device channel, not just a device name since - * devices may have more than one channel e.g. "foo_tx" - * @cfg_hi: Platform-specific initializer for the CFG_HI register - * @cfg_lo: Platform-specific initializer for the CFG_LO register - * @src_master: src master for transfers on allocated channel. - * @dst_master: dest master for transfers on allocated channel. - */ -struct dw_dma_slave { - struct device *dma_dev; - u32 cfg_hi; - u32 cfg_lo; - u8 src_master; - u8 dst_master; -}; - -/** - * struct dw_dma_platform_data - Controller configuration parameters - * @nr_channels: Number of channels supported by hardware (max 8) - * @is_private: The device channels should be marked as private and not for - * by the general purpose DMA channel allocator. - * @chan_allocation_order: Allocate channels starting from 0 or 7 - * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. - * @block_size: Maximum block size supported by the controller - * @nr_masters: Number of AHB masters supported by the controller - * @data_width: Maximum data width supported by hardware per AHB master - * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) - */ -struct dw_dma_platform_data { - unsigned int nr_channels; - bool is_private; -#define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ -#define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ - unsigned char chan_allocation_order; -#define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */ -#define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */ - unsigned char chan_priority; - unsigned short block_size; - unsigned char nr_masters; - unsigned char data_width[4]; -}; - -/* bursts size */ -enum dw_dma_msize { - DW_DMA_MSIZE_1, - DW_DMA_MSIZE_4, - DW_DMA_MSIZE_8, - DW_DMA_MSIZE_16, - DW_DMA_MSIZE_32, - DW_DMA_MSIZE_64, - DW_DMA_MSIZE_128, - DW_DMA_MSIZE_256, -}; - -/* Platform-configurable bits in CFG_HI */ -#define DWC_CFGH_FCMODE (1 << 0) -#define DWC_CFGH_FIFO_MODE (1 << 1) -#define DWC_CFGH_PROTCTL(x) ((x) << 2) -#define DWC_CFGH_SRC_PER(x) ((x) << 7) -#define DWC_CFGH_DST_PER(x) ((x) << 11) - -/* Platform-configurable bits in CFG_LO */ -#define DWC_CFGL_LOCK_CH_XFER (0 << 12) /* scope of LOCK_CH */ -#define DWC_CFGL_LOCK_CH_BLOCK (1 << 12) -#define DWC_CFGL_LOCK_CH_XACT (2 << 12) -#define DWC_CFGL_LOCK_BUS_XFER (0 << 14) /* scope of LOCK_BUS */ -#define DWC_CFGL_LOCK_BUS_BLOCK (1 << 14) -#define DWC_CFGL_LOCK_BUS_XACT (2 << 14) -#define DWC_CFGL_LOCK_CH (1 << 15) /* channel lockout */ -#define DWC_CFGL_LOCK_BUS (1 << 16) /* busmaster lockout */ -#define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ -#define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ - -/* DMA API extensions */ -struct dw_cyclic_desc { - struct dw_desc **desc; - unsigned long periods; - void (*period_callback)(void *param); - void *period_callback_param; -}; - -struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, - dma_addr_t buf_addr, size_t buf_len, size_t period_len, - enum dma_transfer_direction direction); -void dw_dma_cyclic_free(struct dma_chan *chan); -int dw_dma_cyclic_start(struct dma_chan *chan); -void dw_dma_cyclic_stop(struct dma_chan *chan); - -dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan); - -dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan); - -#endif /* DW_DMAC_H */ diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h new file mode 100644 index 000000000000..68b4024184de --- /dev/null +++ b/include/linux/platform_data/dma-dw.h @@ -0,0 +1,111 @@ +/* + * Driver for the Synopsys DesignWare DMA Controller + * + * Copyright (C) 2007 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef DW_DMAC_H +#define DW_DMAC_H + +#include + +/** + * struct dw_dma_slave - Controller-specific information about a slave + * + * @dma_dev: required DMA master device. Depricated. + * @bus_id: name of this device channel, not just a device name since + * devices may have more than one channel e.g. "foo_tx" + * @cfg_hi: Platform-specific initializer for the CFG_HI register + * @cfg_lo: Platform-specific initializer for the CFG_LO register + * @src_master: src master for transfers on allocated channel. + * @dst_master: dest master for transfers on allocated channel. + */ +struct dw_dma_slave { + struct device *dma_dev; + u32 cfg_hi; + u32 cfg_lo; + u8 src_master; + u8 dst_master; +}; + +/** + * struct dw_dma_platform_data - Controller configuration parameters + * @nr_channels: Number of channels supported by hardware (max 8) + * @is_private: The device channels should be marked as private and not for + * by the general purpose DMA channel allocator. + * @chan_allocation_order: Allocate channels starting from 0 or 7 + * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. + * @block_size: Maximum block size supported by the controller + * @nr_masters: Number of AHB masters supported by the controller + * @data_width: Maximum data width supported by hardware per AHB master + * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) + */ +struct dw_dma_platform_data { + unsigned int nr_channels; + bool is_private; +#define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ +#define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ + unsigned char chan_allocation_order; +#define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */ +#define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */ + unsigned char chan_priority; + unsigned short block_size; + unsigned char nr_masters; + unsigned char data_width[4]; +}; + +/* bursts size */ +enum dw_dma_msize { + DW_DMA_MSIZE_1, + DW_DMA_MSIZE_4, + DW_DMA_MSIZE_8, + DW_DMA_MSIZE_16, + DW_DMA_MSIZE_32, + DW_DMA_MSIZE_64, + DW_DMA_MSIZE_128, + DW_DMA_MSIZE_256, +}; + +/* Platform-configurable bits in CFG_HI */ +#define DWC_CFGH_FCMODE (1 << 0) +#define DWC_CFGH_FIFO_MODE (1 << 1) +#define DWC_CFGH_PROTCTL(x) ((x) << 2) +#define DWC_CFGH_SRC_PER(x) ((x) << 7) +#define DWC_CFGH_DST_PER(x) ((x) << 11) + +/* Platform-configurable bits in CFG_LO */ +#define DWC_CFGL_LOCK_CH_XFER (0 << 12) /* scope of LOCK_CH */ +#define DWC_CFGL_LOCK_CH_BLOCK (1 << 12) +#define DWC_CFGL_LOCK_CH_XACT (2 << 12) +#define DWC_CFGL_LOCK_BUS_XFER (0 << 14) /* scope of LOCK_BUS */ +#define DWC_CFGL_LOCK_BUS_BLOCK (1 << 14) +#define DWC_CFGL_LOCK_BUS_XACT (2 << 14) +#define DWC_CFGL_LOCK_CH (1 << 15) /* channel lockout */ +#define DWC_CFGL_LOCK_BUS (1 << 16) /* busmaster lockout */ +#define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ +#define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ + +/* DMA API extensions */ +struct dw_cyclic_desc { + struct dw_desc **desc; + unsigned long periods; + void (*period_callback)(void *param); + void *period_callback_param; +}; + +struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, + dma_addr_t buf_addr, size_t buf_len, size_t period_len, + enum dma_transfer_direction direction); +void dw_dma_cyclic_free(struct dma_chan *chan); +int dw_dma_cyclic_start(struct dma_chan *chan); +void dw_dma_cyclic_stop(struct dma_chan *chan); + +dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan); + +dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan); + +#endif /* DW_DMAC_H */ diff --git a/include/sound/atmel-abdac.h b/include/sound/atmel-abdac.h index edff6a8ba1b5..a8f735d677fa 100644 --- a/include/sound/atmel-abdac.h +++ b/include/sound/atmel-abdac.h @@ -10,7 +10,7 @@ #ifndef __INCLUDE_SOUND_ATMEL_ABDAC_H #define __INCLUDE_SOUND_ATMEL_ABDAC_H -#include +#include /** * struct atmel_abdac_pdata - board specific ABDAC configuration diff --git a/include/sound/atmel-ac97c.h b/include/sound/atmel-ac97c.h index 00e6c289a936..f2a1cdc37661 100644 --- a/include/sound/atmel-ac97c.h +++ b/include/sound/atmel-ac97c.h @@ -10,7 +10,7 @@ #ifndef __INCLUDE_SOUND_ATMEL_AC97C_H #define __INCLUDE_SOUND_ATMEL_AC97C_H -#include +#include #define AC97C_CAPTURE 0x01 #define AC97C_PLAYBACK 0x02 diff --git a/sound/atmel/abdac.c b/sound/atmel/abdac.c index edf2ca72d518..154a7c44e38d 100644 --- a/sound/atmel/abdac.c +++ b/sound/atmel/abdac.c @@ -9,7 +9,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/sound/atmel/ac97c.c b/sound/atmel/ac97c.c index a04d23174dc2..1dfb35afef8f 100644 --- a/sound/atmel/ac97c.c +++ b/sound/atmel/ac97c.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include -- cgit v1.2.3 From 7e1e2f27c5508518e58e5cbb11e26cbb815f4c56 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 19 Aug 2014 20:29:14 +0300 Subject: dmaengine: dw: convert dw_dma_slave to use explicit HS interfaces Instead of exposing the possibility to set DMA registers CFG_HI and CFG_LO strict user to provide handshake interfaces explicitly. Signed-off-by: Andy Shevchenko Acked-by: Hans-Christian Egtvedt Signed-off-by: Vinod Koul --- arch/avr32/mach-at32ap/at32ap700x.c | 15 +++++---------- drivers/dma/dw/core.c | 4 ++-- include/linux/platform_data/dma-dw.h | 10 ++++------ 3 files changed, 11 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index ec7be287a97e..37b75602adf6 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -1356,10 +1356,8 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data) goto fail; slave->sdata.dma_dev = &dw_dmac0_device.dev; - slave->sdata.cfg_hi = (DWC_CFGH_SRC_PER(0) - | DWC_CFGH_DST_PER(1)); - slave->sdata.cfg_lo &= ~(DWC_CFGL_HS_DST_POL - | DWC_CFGL_HS_SRC_POL); + slave->sdata.src_id = 0; + slave->sdata.dst_id = 1; slave->sdata.src_master = 1; slave->sdata.dst_master = 0; @@ -2054,8 +2052,7 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data, /* Check if DMA slave interface for capture should be configured. */ if (flags & AC97C_CAPTURE) { rx_dws->dma_dev = &dw_dmac0_device.dev; - rx_dws->cfg_hi = DWC_CFGH_SRC_PER(3); - rx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL); + rx_dws->src_id = 3; rx_dws->src_master = 0; rx_dws->dst_master = 1; } @@ -2063,8 +2060,7 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data, /* Check if DMA slave interface for playback should be configured. */ if (flags & AC97C_PLAYBACK) { tx_dws->dma_dev = &dw_dmac0_device.dev; - tx_dws->cfg_hi = DWC_CFGH_DST_PER(4); - tx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL); + tx_dws->dst_id = 4; tx_dws->src_master = 0; tx_dws->dst_master = 1; } @@ -2136,8 +2132,7 @@ at32_add_device_abdac(unsigned int id, struct atmel_abdac_pdata *data) dws = &data->dws; dws->dma_dev = &dw_dmac0_device.dev; - dws->cfg_hi = DWC_CFGH_DST_PER(2); - dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL); + dws->dst_id = 2; dws->src_master = 0; dws->dst_master = 1; diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 1af731b83b3f..0a9c052d437c 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -155,8 +155,8 @@ static void dwc_initialize(struct dw_dma_chan *dwc) */ BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev); - cfghi = dws->cfg_hi; - cfglo |= dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK; + cfghi |= DWC_CFGH_DST_PER(dws->dst_id); + cfghi |= DWC_CFGH_SRC_PER(dws->src_id); } else { if (dwc->direction == DMA_MEM_TO_DEV) cfghi = DWC_CFGH_DST_PER(dwc->request_line); diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 68b4024184de..bc411a1bf8e7 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -17,17 +17,15 @@ * struct dw_dma_slave - Controller-specific information about a slave * * @dma_dev: required DMA master device. Depricated. - * @bus_id: name of this device channel, not just a device name since - * devices may have more than one channel e.g. "foo_tx" - * @cfg_hi: Platform-specific initializer for the CFG_HI register - * @cfg_lo: Platform-specific initializer for the CFG_LO register + * @src_id: src request line + * @dst_id: dst request line * @src_master: src master for transfers on allocated channel. * @dst_master: dest master for transfers on allocated channel. */ struct dw_dma_slave { struct device *dma_dev; - u32 cfg_hi; - u32 cfg_lo; + u8 src_id; + u8 dst_id; u8 src_master; u8 dst_master; }; -- cgit v1.2.3 From 960d01acf62747d6518694f92be5b06f67473833 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Sep 2014 22:55:35 +0300 Subject: cfg80211: add WMM traffic stream API Add nl80211 and driver API to validate, add and delete traffic streams with appropriate settings. The API calls for userspace doing the action frame handshake with the peer, and then allows only to set up the parameters in the driver. To avoid setting up a session only to tear it down again, the validate API is provided, but the real usage later can still fail so userspace must be prepared for that. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 ++ include/net/cfg80211.h | 23 ++++++++- include/uapi/linux/nl80211.h | 28 +++++++++++ net/wireless/nl80211.c | 109 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 31 ++++++++++++ net/wireless/trace.h | 45 ++++++++++++++++++ 6 files changed, 239 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 61a09f0644aa..b1be39c76931 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -166,8 +166,12 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) #define IEEE80211_MAX_MESH_ID_LEN 32 +#define IEEE80211_FIRST_TSPEC_TSID 8 #define IEEE80211_NUM_TIDS 16 +/* number of user priorities 802.11 uses */ +#define IEEE80211_NUM_UPS 8 + #define IEEE80211_QOS_CTL_LEN 2 /* 1d tag mask */ #define IEEE80211_QOS_CTL_TAG1D_MASK 0x0007 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c2c710c14a50..a13beab123dc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2318,6 +2318,17 @@ struct cfg80211_qos_map { * @set_ap_chanwidth: Set the AP (including P2P GO) mode channel width for the * given interface This is used e.g. for dynamic HT 20/40 MHz channel width * changes during the lifetime of the BSS. + * + * @add_tx_ts: validate (if admitted_time is 0) or add a TX TS to the device + * with the given parameters; action frame exchange has been handled by + * userspace so this just has to modify the TX path to take the TS into + * account. + * If the admitted time is 0 just validate the parameters to make sure + * the session can be created at all; it is valid to just always return + * success for that but that may result in inefficient behaviour (handshake + * with the peer followed by immediate teardown when the addition is later + * rejected) + * @del_tx_ts: remove an existing TX TS */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2558,6 +2569,12 @@ struct cfg80211_ops { int (*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef); + + int (*add_tx_ts)(struct wiphy *wiphy, struct net_device *dev, + u8 tsid, const u8 *peer, u8 user_prio, + u16 admitted_time); + int (*del_tx_ts)(struct wiphy *wiphy, struct net_device *dev, + u8 tsid, const u8 *peer); }; /* @@ -2604,9 +2621,13 @@ struct cfg80211_ops { * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels. * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in * beaconing mode (AP, IBSS, Mesh, ...). + * @WIPHY_FLAG_SUPPORTS_WMM_ADMISSION: the device supports setting up WMM + * TSPEC sessions (TID aka TSID 0-7) with the NL80211_CMD_ADD_TX_TS + * command. Standard IEEE 802.11 TSPEC setup is not yet supported, it + * needs to be able to handle Block-Ack agreements and other things. */ enum wiphy_flags { - /* use hole at 0 */ + WIPHY_FLAG_SUPPORTS_WMM_ADMISSION = BIT(0), /* use hole at 1 */ /* use hole at 2 */ WIPHY_FLAG_NETNS_OK = BIT(3), diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 29c4399e08b9..e5b8caf5e466 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -722,6 +722,22 @@ * QoS mapping is relevant for IP packets, it is only valid during an * association. This is cleared on disassociation and AP restart. * + * @NL80211_CMD_ADD_TX_TS: Ask the kernel to add a traffic stream for the given + * %NL80211_ATTR_TSID and %NL80211_ATTR_MAC with %NL80211_ATTR_USER_PRIO + * and %NL80211_ATTR_ADMITTED_TIME parameters. + * Note that the action frame handshake with the AP shall be handled by + * userspace via the normal management RX/TX framework, this only sets + * up the TX TS in the driver/device. + * If the admitted time attribute is not added then the request just checks + * if a subsequent setup could be successful, the intent is to use this to + * avoid setting up a session with the AP when local restrictions would + * make that impossible. However, the subsequent "real" setup may still + * fail even if the check was successful. + * @NL80211_CMD_DEL_TX_TS: Remove an existing TS with the %NL80211_ATTR_TSID + * and %NL80211_ATTR_MAC parameters. It isn't necessary to call this + * before removing a station entry entirely, or before disassociating + * or similar, cleanup will happen in the driver/device in this case. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -893,6 +909,9 @@ enum nl80211_commands { NL80211_CMD_SET_QOS_MAP, + NL80211_CMD_ADD_TX_TS, + NL80211_CMD_DEL_TX_TS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1611,6 +1630,11 @@ enum nl80211_commands { * drivers to indicate dynack capability. Dynack is automatically disabled * setting valid value for coverage class. * + * @NL80211_ATTR_TSID: a TSID value (u8 attribute) + * @NL80211_ATTR_USER_PRIO: user priority value (u8 attribute) + * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds + * (per second) (u16 attribute) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1957,6 +1981,10 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_DYN_ACK, + NL80211_ATTR_TSID, + NL80211_ATTR_USER_PRIO, + NL80211_ATTR_ADMITTED_TIME, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e9fbd4f4ddb0..ab7ee4893e40 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -391,6 +391,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG }, [NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY }, [NL80211_ATTR_USE_RRM] = { .type = NLA_FLAG }, + [NL80211_ATTR_TSID] = { .type = NLA_U8 }, + [NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 }, + [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, }; /* policy for the key attributes */ @@ -1510,6 +1513,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) CMD(channel_switch, CHANNEL_SWITCH); CMD(set_qos_map, SET_QOS_MAP); + if (rdev->wiphy.flags & + WIPHY_FLAG_SUPPORTS_WMM_ADMISSION) + CMD(add_tx_ts, ADD_TX_TS); } /* add into the if now */ #undef CMD @@ -9390,6 +9396,93 @@ static int nl80211_set_qos_map(struct sk_buff *skb, return ret; } +static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + const u8 *peer; + u8 tsid, up; + u16 admitted_time = 0; + int err; + + if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION)) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC] || + !info->attrs[NL80211_ATTR_USER_PRIO]) + return -EINVAL; + + tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]); + if (tsid >= IEEE80211_NUM_TIDS) + return -EINVAL; + + up = nla_get_u8(info->attrs[NL80211_ATTR_USER_PRIO]); + if (up >= IEEE80211_NUM_UPS) + return -EINVAL; + + /* WMM uses TIDs 0-7 even for TSPEC */ + if (tsid < IEEE80211_FIRST_TSPEC_TSID) { + if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION)) + return -EINVAL; + } else { + /* TODO: handle 802.11 TSPEC/admission control + * need more attributes for that (e.g. BA session requirement) + */ + return -EINVAL; + } + + peer = nla_data(info->attrs[NL80211_ATTR_MAC]); + + if (info->attrs[NL80211_ATTR_ADMITTED_TIME]) { + admitted_time = + nla_get_u16(info->attrs[NL80211_ATTR_ADMITTED_TIME]); + if (!admitted_time) + return -EINVAL; + } + + wdev_lock(wdev); + switch (wdev->iftype) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + if (wdev->current_bss) + break; + err = -ENOTCONN; + goto out; + default: + err = -EOPNOTSUPP; + goto out; + } + + err = rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time); + + out: + wdev_unlock(wdev); + return err; +} + +static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + const u8 *peer; + u8 tsid; + int err; + + if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]); + peer = nla_data(info->attrs[NL80211_ATTR_MAC]); + + wdev_lock(wdev); + err = rdev_del_tx_ts(rdev, dev, tsid, peer); + wdev_unlock(wdev); + + return err; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -10147,6 +10240,22 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_ADD_TX_TS, + .doit = nl80211_add_tx_ts, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_DEL_TX_TS, + .doit = nl80211_del_tx_ts, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; /* notification functions */ diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 56c2240c30ce..f6d457d6a558 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -915,4 +915,35 @@ rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_add_tx_ts(struct cfg80211_registered_device *rdev, + struct net_device *dev, u8 tsid, const u8 *peer, + u8 user_prio, u16 admitted_time) +{ + int ret = -EOPNOTSUPP; + + trace_rdev_add_tx_ts(&rdev->wiphy, dev, tsid, peer, + user_prio, admitted_time); + if (rdev->ops->add_tx_ts) + ret = rdev->ops->add_tx_ts(&rdev->wiphy, dev, tsid, peer, + user_prio, admitted_time); + trace_rdev_return_int(&rdev->wiphy, ret); + + return ret; +} + +static inline int +rdev_del_tx_ts(struct cfg80211_registered_device *rdev, + struct net_device *dev, u8 tsid, const u8 *peer) +{ + int ret = -EOPNOTSUPP; + + trace_rdev_del_tx_ts(&rdev->wiphy, dev, tsid, peer); + if (rdev->ops->del_tx_ts) + ret = rdev->ops->del_tx_ts(&rdev->wiphy, dev, tsid, peer); + trace_rdev_return_int(&rdev->wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 0c524cd76c83..625a6e6d1168 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1896,6 +1896,51 @@ TRACE_EVENT(rdev_set_ap_chanwidth, WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(rdev_add_tx_ts, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + u8 tsid, const u8 *peer, u8 user_prio, u16 admitted_time), + TP_ARGS(wiphy, netdev, tsid, peer, user_prio, admitted_time), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(peer) + __field(u8, tsid) + __field(u8, user_prio) + __field(u16, admitted_time) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(peer, peer); + __entry->tsid = tsid; + __entry->user_prio = user_prio; + __entry->admitted_time = admitted_time; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d, UP %d, time %d", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), + __entry->tsid, __entry->user_prio, __entry->admitted_time) +); + +TRACE_EVENT(rdev_del_tx_ts, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + u8 tsid, const u8 *peer), + TP_ARGS(wiphy, netdev, tsid, peer), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(peer) + __field(u8, tsid) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(peer, peer); + __entry->tsid = tsid; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tsid) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ -- cgit v1.2.3 From 047133066e6c2549403fe5a2d619f47ba4212ef5 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Thu, 7 Aug 2014 05:10:22 -0700 Subject: leds: Reorder include directives Reorder include directives so that they are arranged in alphabetical order. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- drivers/leds/led-class.c | 13 +++++++------ drivers/leds/led-core.c | 3 ++- include/linux/leds.h | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index aa29198fca3e..4bd4572efe6e 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -9,16 +9,17 @@ * published by the Free Software Foundation. */ -#include -#include +#include +#include +#include #include +#include +#include #include +#include +#include #include -#include #include -#include -#include -#include #include "leds.h" static struct class *leds_class; diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index 71b40d3bf776..50b579ad948e 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -12,10 +12,11 @@ */ #include +#include #include #include +#include #include -#include #include "leds.h" DECLARE_RWSEM(leds_list_lock); diff --git a/include/linux/leds.h b/include/linux/leds.h index e43686472197..4be2d7623d9e 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -13,8 +13,8 @@ #define __LINUX_LEDS_H_INCLUDED #include -#include #include +#include #include #include -- cgit v1.2.3 From d8082827d8a214343b761f2c4554d2a7d1573d63 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Thu, 7 Aug 2014 05:10:23 -0700 Subject: leds: make brightness type consistent across whole subsystem Documentations states that brightness units type is enum led_brightness and this is the type used by the led API functions. Adjust the type of brightness variables in the struct led_classdev accordingly. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- include/linux/leds.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 4be2d7623d9e..f2e1cbc25705 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -31,8 +31,8 @@ enum led_brightness { struct led_classdev { const char *name; - int brightness; - int max_brightness; + enum led_brightness brightness; + enum led_brightness max_brightness; int flags; /* Lower 16 bits reflect status */ -- cgit v1.2.3 From 974a70bdecea5296db1b643e4046ef208e99c592 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 12 Sep 2014 09:32:41 +0800 Subject: usb: gadget: udc-core: add utility for bus reset The udc driver can notify the udc core that bus reset occurs by calling this utility, the core will notify gadget driver this information and update gadget state accordingly. Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/udc-core.c | 17 +++++++++++++++++ include/linux/usb/gadget.h | 6 ++++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c index b0d98172bc07..ba661c6da54a 100644 --- a/drivers/usb/gadget/udc/udc-core.c +++ b/drivers/usb/gadget/udc/udc-core.c @@ -123,6 +123,23 @@ EXPORT_SYMBOL_GPL(usb_gadget_set_state); /* ------------------------------------------------------------------------- */ +/** + * usb_gadget_udc_reset - notifies the udc core that bus reset occurs + * @gadget: The gadget which bus reset occurs + * @driver: The gadget driver we want to notify + * + * If the udc driver has bus reset handler, it needs to call this when the bus + * reset occurs, it notifies the gadget driver that the bus reset occurs as + * well as updates gadget state. + */ +void usb_gadget_udc_reset(struct usb_gadget *gadget, + struct usb_gadget_driver *driver) +{ + driver->reset(gadget); + usb_gadget_set_state(gadget, USB_STATE_DEFAULT); +} +EXPORT_SYMBOL_GPL(usb_gadget_udc_reset); + /** * usb_gadget_udc_start - tells usb device controller to start up * @gadget: The gadget we want to get started diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 598a6e9b2850..d18811433324 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -1017,6 +1017,12 @@ extern void usb_gadget_set_state(struct usb_gadget *gadget, /*-------------------------------------------------------------------------*/ +/* utility to tell udc core that the bus reset occurs */ +extern void usb_gadget_udc_reset(struct usb_gadget *gadget, + struct usb_gadget_driver *driver); + +/*-------------------------------------------------------------------------*/ + /* utility wrapping a simple endpoint selection policy */ extern struct usb_ep *usb_ep_autoconfig(struct usb_gadget *, -- cgit v1.2.3 From d4b18c3e00b8d18fbd316abe9639b91ad416e1f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Sep 2014 17:36:31 -0700 Subject: pnfs: remove GETDEVICELIST implementation The current GETDEVICELIST implementation is buggy in that it doesn't handle cursors correctly, and in that it returns an error if the server returns NFSERR_NOTSUPP. Given that there is no actual need for GETDEVICELIST, it has various issues and might get removed for NFSv4.2 stop using it in the blocklayout driver, and thus the Linux NFS client as whole. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 2 +- fs/nfs/nfs4proc.c | 48 --------------- fs/nfs/nfs4xdr.c | 130 --------------------------------------- fs/nfs/pnfs.h | 5 -- fs/nfs/pnfs_dev.c | 30 --------- include/linux/nfs_xdr.h | 11 ---- 6 files changed, 1 insertion(+), 225 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 25ba9e0e6fff..3e1f1afc6db4 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -532,7 +532,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) return -EINVAL; } - return nfs4_deviceid_getdevicelist(server, fh); + return 0; } static bool diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 47fa67a3a8cf..0b711227cfa5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7808,54 +7808,6 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) return status; } -/* - * Retrieve the list of Data Server devices from the MDS. - */ -static int _nfs4_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh, - struct pnfs_devicelist *devlist) -{ - struct nfs4_getdevicelist_args args = { - .fh = fh, - .layoutclass = server->pnfs_curr_ld->id, - }; - struct nfs4_getdevicelist_res res = { - .devlist = devlist, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST], - .rpc_argp = &args, - .rpc_resp = &res, - }; - int status; - - dprintk("--> %s\n", __func__); - status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, - &res.seq_res, 0); - dprintk("<-- %s status=%d\n", __func__, status); - return status; -} - -int nfs4_proc_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh, - struct pnfs_devicelist *devlist) -{ - struct nfs4_exception exception = { }; - int err; - - do { - err = nfs4_handle_exception(server, - _nfs4_getdevicelist(server, fh, devlist), - &exception); - } while (exception.retry); - - dprintk("%s: err=%d, num_devs=%u\n", __func__, - err, devlist->num_devs); - - return err; -} -EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); - static int _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f2cd957adb90..b8165eab0a32 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -362,17 +362,6 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) -#define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \ - encode_verifier_maxsz) -#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + \ - 2 /* nfs_cookie4 gdlr_cookie */ + \ - decode_verifier_maxsz \ - /* verifier4 gdlr_verifier */ + \ - 1 /* gdlr_deviceid_list count */ + \ - XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM * \ - NFS4_DEVICEID4_SIZE) \ - /* gdlr_deviceid_list */ + \ - 1 /* bool gdlr_eof */) #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ @@ -812,14 +801,6 @@ static int nfs4_stat_to_errno(int); #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_reclaim_complete_maxsz) -#define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \ - encode_sequence_maxsz + \ - encode_putfh_maxsz + \ - encode_getdevicelist_maxsz) -#define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \ - decode_sequence_maxsz + \ - decode_putfh_maxsz + \ - decode_getdevicelist_maxsz) #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz +\ encode_getdeviceinfo_maxsz) @@ -1929,24 +1910,6 @@ static void encode_sequence(struct xdr_stream *xdr, } #ifdef CONFIG_NFS_V4_1 -static void -encode_getdevicelist(struct xdr_stream *xdr, - const struct nfs4_getdevicelist_args *args, - struct compound_hdr *hdr) -{ - __be32 *p; - nfs4_verifier dummy = { - .data = "dummmmmy", - }; - - encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr); - p = reserve_space(xdr, 16); - *p++ = cpu_to_be32(args->layoutclass); - *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); - xdr_encode_hyper(p, 0ULL); /* cookie */ - encode_nfs4_verifier(xdr, &dummy); -} - static void encode_getdeviceinfo(struct xdr_stream *xdr, const struct nfs4_getdeviceinfo_args *args, @@ -2900,24 +2863,6 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, encode_nops(&hdr); } -/* - * Encode GETDEVICELIST request - */ -static void nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, - struct xdr_stream *xdr, - struct nfs4_getdevicelist_args *args) -{ - struct compound_hdr hdr = { - .minorversion = nfs4_xdr_minorversion(&args->seq_args), - }; - - encode_compound_hdr(xdr, req, &hdr); - encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->fh, &hdr); - encode_getdevicelist(xdr, args, &hdr); - encode_nops(&hdr); -} - /* * Encode GETDEVICEINFO request */ @@ -5773,54 +5718,6 @@ out_overflow: } #if defined(CONFIG_NFS_V4_1) -/* - * TODO: Need to handle case when EOF != true; - */ -static int decode_getdevicelist(struct xdr_stream *xdr, - struct pnfs_devicelist *res) -{ - __be32 *p; - int status, i; - nfs4_verifier verftemp; - - status = decode_op_hdr(xdr, OP_GETDEVICELIST); - if (status) - return status; - - p = xdr_inline_decode(xdr, 8 + 8 + 4); - if (unlikely(!p)) - goto out_overflow; - - /* TODO: Skip cookie for now */ - p += 2; - - /* Read verifier */ - p = xdr_decode_opaque_fixed(p, verftemp.data, NFS4_VERIFIER_SIZE); - - res->num_devs = be32_to_cpup(p); - - dprintk("%s: num_dev %d\n", __func__, res->num_devs); - - if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { - printk(KERN_ERR "NFS: %s too many result dev_num %u\n", - __func__, res->num_devs); - return -EIO; - } - - p = xdr_inline_decode(xdr, - res->num_devs * NFS4_DEVICEID4_SIZE + 4); - if (unlikely(!p)) - goto out_overflow; - for (i = 0; i < res->num_devs; i++) - p = xdr_decode_opaque_fixed(p, res->dev_id[i].data, - NFS4_DEVICEID4_SIZE); - res->eof = be32_to_cpup(p); - return 0; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; -} - static int decode_getdeviceinfo(struct xdr_stream *xdr, struct pnfs_device *pdev) { @@ -7104,32 +7001,6 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, return status; } -/* - * Decode GETDEVICELIST response - */ -static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, - struct xdr_stream *xdr, - struct nfs4_getdevicelist_res *res) -{ - struct compound_hdr hdr; - int status; - - dprintk("encoding getdevicelist!\n"); - - status = decode_compound_hdr(xdr, &hdr); - if (status != 0) - goto out; - status = decode_sequence(xdr, &res->seq_res, rqstp); - if (status != 0) - goto out; - status = decode_putfh(xdr); - if (status != 0) - goto out; - status = decode_getdevicelist(xdr, res->devlist); -out: - return status; -} - /* * Decode GETDEVINFO response */ @@ -7498,7 +7369,6 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), - PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), PROC(BIND_CONN_TO_SESSION, enc_bind_conn_to_session, dec_bind_conn_to_session), PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d84fe29fc88b..b10f12fc6818 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -180,9 +180,6 @@ extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); /* nfs4proc.c */ -extern int nfs4_proc_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh, - struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, struct rpc_cred *cred); @@ -277,8 +274,6 @@ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); void nfs4_deviceid_purge_client(const struct nfs_client *); -int nfs4_deviceid_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh); static inline struct nfs4_deviceid_node * nfs4_get_deviceid(struct nfs4_deviceid_node *d) diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 82c2836fdb38..aa2ec0015183 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -358,33 +358,3 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) } rcu_read_unlock(); } - -int -nfs4_deviceid_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh) -{ - struct pnfs_devicelist *dlist; - struct nfs4_deviceid_node *d; - int error = 0, i; - - dlist = kzalloc(sizeof(struct pnfs_devicelist), GFP_NOFS); - if (!dlist) - return -ENOMEM; - - while (!dlist->eof) { - error = nfs4_proc_getdevicelist(server, fh, dlist); - if (error) - break; - - for (i = 0; i < dlist->num_devs; i++) { - d = nfs4_find_get_deviceid(server, &dlist->dev_id[i], - NULL, GFP_NOFS); - if (d) - nfs4_put_deviceid_node(d); - } - } - - kfree(dlist); - return error; -} -EXPORT_SYMBOL_GPL(nfs4_deviceid_getdevicelist); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f4092c6b90fb..7ae249ccb78d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -252,17 +252,6 @@ struct nfs4_layoutget { gfp_t gfp_flags; }; -struct nfs4_getdevicelist_args { - struct nfs4_sequence_args seq_args; - const struct nfs_fh *fh; - u32 layoutclass; -}; - -struct nfs4_getdevicelist_res { - struct nfs4_sequence_res seq_res; - struct pnfs_devicelist *devlist; -}; - struct nfs4_getdeviceinfo_args { struct nfs4_sequence_args seq_args; struct pnfs_device *pdev; -- cgit v1.2.3 From f418c64b71590bac8fdebd0969a1eeaffaf036d2 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 3 Sep 2014 12:19:07 -0400 Subject: NFS: Unconditionally enable commit code The goal is to create a generic NFS module with code that does not depend on what versions of NFS are enabled. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 14 ------------ fs/nfs/write.c | 61 -------------------------------------------------- include/linux/nfs_fs.h | 8 ------- 3 files changed, 83 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 65ef6e00deee..dda4b8667c02 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -178,7 +178,6 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) /* * nfs_direct_cmp_commit_data_verf - compare verifier for commit data * @dreq - direct request possibly spanning multiple servers @@ -197,7 +196,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, WARN_ON_ONCE(verfp->committed < 0); return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); } -#endif /** * nfs_direct_IO - NFS address space operation for direct I/O @@ -576,7 +574,6 @@ out: return result; } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; @@ -700,17 +697,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ } -#else -static void nfs_direct_write_schedule_work(struct work_struct *work) -{ -} - -static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) -{ - nfs_direct_complete(dreq, true); -} -#endif - static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 128d97f01d1c..6786873a2901 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -720,7 +720,6 @@ nfs_mark_request_dirty(struct nfs_page *req) __set_page_dirty_nobuffers(req->wb_page); } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) /* * nfs_page_search_commits_for_head_request_locked * @@ -870,43 +869,6 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr) return hdr->verf.committed != NFS_FILE_SYNC; } -#else -static struct nfs_page * -nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct page *page) -{ - return NULL; -} - -static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, - struct inode *inode) -{ -} - -void nfs_init_cinfo(struct nfs_commit_info *cinfo, - struct inode *inode, - struct nfs_direct_req *dreq) -{ -} - -void -nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) -{ -} - -static void -nfs_clear_request_commit(struct nfs_page *req) -{ -} - -int nfs_write_need_commit(struct nfs_pgio_header *hdr) -{ - return 0; -} - -#endif - static void nfs_write_completion(struct nfs_pgio_header *hdr) { struct nfs_commit_info cinfo; @@ -942,7 +904,6 @@ out: hdr->release(hdr); } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { @@ -999,19 +960,6 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, return ret; } -#else -unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) -{ - return 0; -} - -int nfs_scan_commit(struct inode *inode, struct list_head *dst, - struct nfs_commit_info *cinfo) -{ - return 0; -} -#endif - /* * Search for an existing write request, and attempt to update * it to reflect a new dirty region on a given page. @@ -1404,7 +1352,6 @@ static int nfs_writeback_done(struct rpc_task *task, return status; nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) if (hdr->res.verf->committed < hdr->args.stable && task->tk_status >= 0) { /* We tried a write call, but the server did not @@ -1426,7 +1373,6 @@ static int nfs_writeback_done(struct rpc_task *task, complain = jiffies + 300 * HZ; } } -#endif /* Deal with the suid/sgid bit corner case */ if (nfs_should_remove_suid(inode)) @@ -1479,7 +1425,6 @@ static void nfs_writeback_result(struct rpc_task *task, } -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) { int ret; @@ -1803,12 +1748,6 @@ out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return ret; } -#else -static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) -{ - return 0; -} -#endif int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5180a7ededec..fd334d4b0d41 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -529,17 +529,9 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); -#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); -#else -static inline int -nfs_commit_inode(struct inode *inode, int how) -{ - return 0; -} -#endif static inline int nfs_have_writebacks(struct inode *inode) -- cgit v1.2.3 From cb8c20fa53ec28602793ee43ddc7e8883be62e69 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 3 Sep 2014 12:19:10 -0400 Subject: NFS: Move NFS v3 acl functions to nfs3_fs.h This code is internal to the v3 module, so other parts of the client shouldn't have any knowledge of it. nfs3_getxattr(), nfs3_setxattr(), and nfs3_removexattr() no longer exist anywhere so I remove the declarations while I'm here. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs3_fs.h | 19 +++++++++++++++++++ fs/nfs/nfs3super.c | 1 + include/linux/nfs_fs.h | 33 --------------------------------- 3 files changed, 20 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index 6599e0dcd69f..333ae4068506 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -6,6 +6,25 @@ #ifndef __LINUX_FS_NFS_NFS3_FS_H #define __LINUX_FS_NFS_NFS3_FS_H +/* + * nfs3acl.c + */ +#ifdef CONFIG_NFS_V3_ACL +extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type); +extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl); +extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t); +extern const struct xattr_handler *nfs3_xattr_handlers[]; +#else +static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + return 0; +} +#define nfs3_listxattr NULL +#endif /* CONFIG_NFS_V3_ACL */ + /* nfs3client.c */ struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index d6a98949af19..6af29c2da352 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c @@ -4,6 +4,7 @@ #include #include #include "internal.h" +#include "nfs3_fs.h" #include "nfs.h" static struct nfs_subversion nfs_v3 = { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index fd334d4b0d41..28d649054d5f 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -442,22 +442,6 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file) return NULL; } -/* - * linux/fs/nfs/xattr.c - */ -#ifdef CONFIG_NFS_V3_ACL -extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t); -extern ssize_t nfs3_getxattr(struct dentry *, const char *, void *, size_t); -extern int nfs3_setxattr(struct dentry *, const char *, - const void *, size_t, int); -extern int nfs3_removexattr (struct dentry *, const char *name); -#else -# define nfs3_listxattr NULL -# define nfs3_getxattr NULL -# define nfs3_setxattr NULL -# define nfs3_removexattr NULL -#endif - /* * linux/fs/nfs/direct.c */ @@ -548,23 +532,6 @@ extern int nfs_readpages(struct file *, struct address_space *, extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, struct page *); -/* - * linux/fs/nfs3proc.c - */ -#ifdef CONFIG_NFS_V3_ACL -extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type); -extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type); -extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, - struct posix_acl *dfacl); -extern const struct xattr_handler *nfs3_xattr_handlers[]; -#else -static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, - struct posix_acl *dfacl) -{ - return 0; -} -#endif /* CONFIG_NFS_V3_ACL */ - /* * inline functions */ -- cgit v1.2.3 From 3ef7de5304edf60d0b8674dd7cdacc104e15a93c Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Wed, 20 Aug 2014 06:41:55 -0700 Subject: leds: Improve and export led_update_brightness led_update_brightness helper function used to be exploited only locally in the led-class.c module, where its result was being passed to the brightness_show sysfs callback. With the introduction of v4l2-flash subdevice the same functionality becomes required for reading current brightness from a LED device. This patch adds checking of return value of the brightness_get callback and moves the led_update_brightness() function to the LED subsystem public API. Signed-off-by: Jacek Anaszewski Acked-by: Kyungmin Park Cc: Richard Purdie Signed-off-by: Bryan Wu --- drivers/leds/led-class.c | 6 ------ drivers/leds/led-core.c | 16 ++++++++++++++++ include/linux/leds.h | 10 ++++++++++ 3 files changed, 26 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 71666a40b79a..7440c58b8e6f 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -24,12 +24,6 @@ static struct class *leds_class; -static void led_update_brightness(struct led_classdev *led_cdev) -{ - if (led_cdev->brightness_get) - led_cdev->brightness = led_cdev->brightness_get(led_cdev); -} - static ssize_t brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index 50b579ad948e..aaa8eba9099f 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -127,3 +127,19 @@ void led_set_brightness(struct led_classdev *led_cdev, __led_set_brightness(led_cdev, brightness); } EXPORT_SYMBOL(led_set_brightness); + +int led_update_brightness(struct led_classdev *led_cdev) +{ + int ret = 0; + + if (led_cdev->brightness_get) { + ret = led_cdev->brightness_get(led_cdev); + if (ret >= 0) { + led_cdev->brightness = ret; + return 0; + } + } + + return ret; +} +EXPORT_SYMBOL(led_update_brightness); diff --git a/include/linux/leds.h b/include/linux/leds.h index f2e1cbc25705..a57611d0c94e 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -140,6 +140,16 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev, */ extern void led_set_brightness(struct led_classdev *led_cdev, enum led_brightness brightness); +/** + * led_update_brightness - update LED brightness + * @led_cdev: the LED to query + * + * Get an LED's current brightness and update led_cdev->brightness + * member with the obtained value. + * + * Returns: 0 on success or negative error value on failure + */ +extern int led_update_brightness(struct led_classdev *led_cdev); /* * LED Triggers -- cgit v1.2.3 From fbfa398b84a5fc6e085dedba5ec3e94f21815d05 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 28 Aug 2014 12:21:44 -0600 Subject: PCI: Remove unused pci_configure_slot() All pci_configure_slot() uses have been removed, so remove the definition as well. Signed-off-by: Bjorn Helgaas Acked-by: Yinghai Lu --- drivers/pci/probe.c | 28 ---------------------------- include/linux/pci_hotplug.h | 2 -- 2 files changed, 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index e9482867555b..4b3b29bc7a82 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1358,34 +1358,6 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) */ } -void pci_configure_slot(struct pci_dev *dev) -{ - struct pci_dev *cdev; - struct hotplug_params hpp; - int ret; - - if (!(dev->hdr_type == PCI_HEADER_TYPE_NORMAL || - (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && - (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) - return; - - pcie_bus_configure_settings(dev->bus); - - memset(&hpp, 0, sizeof(hpp)); - ret = pci_get_hp_params(dev, &hpp); - - program_hpp_type2(dev, hpp.t2); - program_hpp_type1(dev, hpp.t1); - program_hpp_type0(dev, hpp.t0); - - if (dev->subordinate) { - list_for_each_entry(cdev, &dev->subordinate->devices, - bus_list) - pci_configure_slot(cdev); - } -} -EXPORT_SYMBOL_GPL(pci_configure_slot); - static void pci_configure_device(struct pci_dev *dev) { struct hotplug_params hpp; diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index 5f2e559af6b0..2706ee9a4327 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -187,6 +187,4 @@ static inline int pci_get_hp_params(struct pci_dev *dev, return -ENODEV; } #endif - -void pci_configure_slot(struct pci_dev *dev); #endif -- cgit v1.2.3 From 46e5da40aec256155cfedee96dd21a75da941f2c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 12 Sep 2014 20:04:52 -0700 Subject: net: qdisc: use rcu prefix and silence sparse warnings Add __rcu notation to qdisc handling by doing this we can make smatch output more legible. And anyways some of the cases should be using rcu_dereference() see qdisc_all_tx_empty(), qdisc_tx_chainging(), and so on. Also *wake_queue() API is commonly called from driver timer routines without rcu lock or rtnl lock. So I added rcu_read_lock() blocks around netif_wake_subqueue and netif_tx_wake_queue. Signed-off-by: John Fastabend Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 29 ++++----------------------- include/net/sch_generic.h | 21 +++++++++++++------ net/core/dev.c | 51 +++++++++++++++++++++++++++++++++++++++++++++-- net/sched/sch_generic.c | 4 ++-- net/sched/sch_mqprio.c | 6 ++++-- net/sched/sch_teql.c | 13 +++++++----- 6 files changed, 82 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ba72f6baae1a..ae721f53739e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -543,7 +543,7 @@ struct netdev_queue { * read mostly part */ struct net_device *dev; - struct Qdisc *qdisc; + struct Qdisc __rcu *qdisc; struct Qdisc *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; @@ -2356,12 +2356,7 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd, DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); void __netif_schedule(struct Qdisc *q); - -static inline void netif_schedule_queue(struct netdev_queue *txq) -{ - if (!(txq->state & QUEUE_STATE_ANY_XOFF)) - __netif_schedule(txq->qdisc); -} +void netif_schedule_queue(struct netdev_queue *txq); static inline void netif_tx_schedule_all(struct net_device *dev) { @@ -2397,11 +2392,7 @@ static inline void netif_tx_start_all_queues(struct net_device *dev) } } -static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) -{ - if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) - __netif_schedule(dev_queue->qdisc); -} +void netif_tx_wake_queue(struct netdev_queue *dev_queue); /** * netif_wake_queue - restart transmit @@ -2673,19 +2664,7 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev, return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb)); } -/** - * netif_wake_subqueue - allow sending packets on subqueue - * @dev: network device - * @queue_index: sub queue index - * - * Resume individual transmit queue of a device with multiple transmit queues. - */ -static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) -{ - struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); - if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) - __netif_schedule(txq->qdisc); -} +void netif_wake_subqueue(struct net_device *dev, u16 queue_index); #ifdef CONFIG_XPS int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a3cfb8ebeb53..56838ab29b42 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -259,7 +259,9 @@ static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc) static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc) { - return qdisc->dev_queue->qdisc; + struct Qdisc *q = rcu_dereference_rtnl(qdisc->dev_queue->qdisc); + + return q; } static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc) @@ -384,7 +386,7 @@ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) struct Qdisc *qdisc; for (; i < dev->num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); if (qdisc) { spin_lock_bh(qdisc_lock(qdisc)); qdisc_reset(qdisc); @@ -402,13 +404,18 @@ static inline void qdisc_reset_all_tx(struct net_device *dev) static inline bool qdisc_all_tx_empty(const struct net_device *dev) { unsigned int i; + + rcu_read_lock(); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - const struct Qdisc *q = txq->qdisc; + const struct Qdisc *q = rcu_dereference(txq->qdisc); - if (q->q.qlen) + if (q->q.qlen) { + rcu_read_unlock(); return false; + } } + rcu_read_unlock(); return true; } @@ -416,9 +423,10 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev) static inline bool qdisc_tx_changing(const struct net_device *dev) { unsigned int i; + for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - if (txq->qdisc != txq->qdisc_sleeping) + if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping) return true; } return false; @@ -428,9 +436,10 @@ static inline bool qdisc_tx_changing(const struct net_device *dev) static inline bool qdisc_tx_is_noop(const struct net_device *dev) { unsigned int i; + for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - if (txq->qdisc != &noop_qdisc) + if (rcu_access_pointer(txq->qdisc) != &noop_qdisc) return false; } return true; diff --git a/net/core/dev.c b/net/core/dev.c index 3c6a967e5830..b3d6dbc0c696 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2177,6 +2177,53 @@ static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) return (struct dev_kfree_skb_cb *)skb->cb; } +void netif_schedule_queue(struct netdev_queue *txq) +{ + rcu_read_lock(); + if (!(txq->state & QUEUE_STATE_ANY_XOFF)) { + struct Qdisc *q = rcu_dereference(txq->qdisc); + + __netif_schedule(q); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(netif_schedule_queue); + +/** + * netif_wake_subqueue - allow sending packets on subqueue + * @dev: network device + * @queue_index: sub queue index + * + * Resume individual transmit queue of a device with multiple transmit queues. + */ +void netif_wake_subqueue(struct net_device *dev, u16 queue_index) +{ + struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); + + if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) { + struct Qdisc *q; + + rcu_read_lock(); + q = rcu_dereference(txq->qdisc); + __netif_schedule(q); + rcu_read_unlock(); + } +} +EXPORT_SYMBOL(netif_wake_subqueue); + +void netif_tx_wake_queue(struct netdev_queue *dev_queue) +{ + if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) { + struct Qdisc *q; + + rcu_read_lock(); + q = rcu_dereference(dev_queue->qdisc); + __netif_schedule(q); + rcu_read_unlock(); + } +} +EXPORT_SYMBOL(netif_tx_wake_queue); + void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) { unsigned long flags; @@ -3432,7 +3479,7 @@ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - q = rxq->qdisc; + q = rcu_dereference(rxq->qdisc); if (q != &noop_qdisc) { spin_lock(qdisc_lock(q)); if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) @@ -3449,7 +3496,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, { struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); - if (!rxq || rxq->qdisc == &noop_qdisc) + if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc) goto out; if (*pt_prev) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 19696ebe9ebc..346ef85617d3 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -783,7 +783,7 @@ static void dev_deactivate_queue(struct net_device *dev, struct Qdisc *qdisc_default = _qdisc_default; struct Qdisc *qdisc; - qdisc = dev_queue->qdisc; + qdisc = rtnl_dereference(dev_queue->qdisc); if (qdisc) { spin_lock_bh(qdisc_lock(qdisc)); @@ -876,7 +876,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, { struct Qdisc *qdisc = _qdisc; - dev_queue->qdisc = qdisc; + rcu_assign_pointer(dev_queue->qdisc, qdisc); dev_queue->qdisc_sleeping = qdisc; } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 6749e2f540d0..37e7d25d21f1 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -231,7 +231,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) memset(&sch->qstats, 0, sizeof(sch->qstats)); for (i = 0; i < dev->num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); spin_lock_bh(qdisc_lock(qdisc)); sch->q.qlen += qdisc->q.qlen; sch->bstats.bytes += qdisc->bstats.bytes; @@ -340,7 +340,9 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, spin_unlock_bh(d->lock); for (i = tc.offset; i < tc.offset + tc.count; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; + struct netdev_queue *q = netdev_get_tx_queue(dev, i); + + qdisc = rtnl_dereference(q->qdisc); spin_lock_bh(qdisc_lock(qdisc)); bstats.bytes += qdisc->bstats.bytes; bstats.packets += qdisc->bstats.packets; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index aaa8d03ed054..5cd291bd00e4 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -96,11 +96,14 @@ teql_dequeue(struct Qdisc *sch) struct teql_sched_data *dat = qdisc_priv(sch); struct netdev_queue *dat_queue; struct sk_buff *skb; + struct Qdisc *q; skb = __skb_dequeue(&dat->q); dat_queue = netdev_get_tx_queue(dat->m->dev, 0); + q = rcu_dereference_bh(dat_queue->qdisc); + if (skb == NULL) { - struct net_device *m = qdisc_dev(dat_queue->qdisc); + struct net_device *m = qdisc_dev(q); if (m) { dat->m->slaves = sch; netif_wake_queue(m); @@ -108,7 +111,7 @@ teql_dequeue(struct Qdisc *sch) } else { qdisc_bstats_update(sch, skb); } - sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; + sch->q.qlen = dat->q.qlen + q->q.qlen; return skb; } @@ -157,9 +160,9 @@ teql_destroy(struct Qdisc *sch) txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; - root_lock = qdisc_root_sleeping_lock(txq->qdisc); + root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc)); spin_lock_bh(root_lock); - qdisc_reset(txq->qdisc); + qdisc_reset(rtnl_dereference(txq->qdisc)); spin_unlock_bh(root_lock); } } @@ -266,7 +269,7 @@ static inline int teql_resolve(struct sk_buff *skb, struct dst_entry *dst = skb_dst(skb); int res; - if (txq->qdisc == &noop_qdisc) + if (rcu_access_pointer(txq->qdisc) == &noop_qdisc) return -ENODEV; if (!dev->header_ops || !dst) -- cgit v1.2.3 From 331b72922c5f58d48fd5500acadc91777cc31970 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 12 Sep 2014 20:08:20 -0700 Subject: net: sched: RCU cls_tcindex Make cls_tcindex RCU safe. This patch addds a new RCU routine rcu_dereference_bh_rtnl() to check caller either holds the rcu read lock or RTNL. This is needed to handle the case where tcindex_lookup() is being called in both cases. Signed-off-by: John Fastabend Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 10 ++ net/sched/cls_tcindex.c | 248 ++++++++++++++++++++++++++++------------------ 2 files changed, 164 insertions(+), 94 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 167bae7bdfa4..6cacbce1a06c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -46,6 +46,16 @@ static inline int lockdep_rtnl_is_held(void) #define rcu_dereference_rtnl(p) \ rcu_dereference_check(p, lockdep_rtnl_is_held()) +/** + * rcu_dereference_bh_rtnl - rcu_dereference_bh with debug checking + * @p: The pointer to read, prior to dereference + * + * Do an rcu_dereference_bh(p), but check caller either holds rcu_read_lock_bh() + * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference_bh() + */ +#define rcu_dereference_bh_rtnl(p) \ + rcu_dereference_bh_check(p, lockdep_rtnl_is_held()) + /** * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 3e9f76413b3b..a9f4279fbd69 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -32,19 +32,21 @@ struct tcindex_filter_result { struct tcindex_filter { u16 key; struct tcindex_filter_result result; - struct tcindex_filter *next; + struct tcindex_filter __rcu *next; + struct rcu_head rcu; }; struct tcindex_data { struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */ - struct tcindex_filter **h; /* imperfect hash; only used if !perfect; - NULL if unused */ + struct tcindex_filter __rcu **h; /* imperfect hash; */ + struct tcf_proto *tp; u16 mask; /* AND key with mask */ - int shift; /* shift ANDed key to the right */ - int hash; /* hash table size; 0 if undefined */ - int alloc_hash; /* allocated size */ - int fall_through; /* 0: only classify if explicit match */ + u32 shift; /* shift ANDed key to the right */ + u32 hash; /* hash table size; 0 if undefined */ + u32 alloc_hash; /* allocated size */ + u32 fall_through; /* 0: only classify if explicit match */ + struct rcu_head rcu; }; static inline int @@ -56,13 +58,18 @@ tcindex_filter_is_set(struct tcindex_filter_result *r) static struct tcindex_filter_result * tcindex_lookup(struct tcindex_data *p, u16 key) { - struct tcindex_filter *f; + if (p->perfect) { + struct tcindex_filter_result *f = p->perfect + key; + + return tcindex_filter_is_set(f) ? f : NULL; + } else if (p->h) { + struct tcindex_filter __rcu **fp; + struct tcindex_filter *f; - if (p->perfect) - return tcindex_filter_is_set(p->perfect + key) ? - p->perfect + key : NULL; - else if (p->h) { - for (f = p->h[key % p->hash]; f; f = f->next) + fp = &p->h[key % p->hash]; + for (f = rcu_dereference_bh_rtnl(*fp); + f; + fp = &f->next, f = rcu_dereference_bh_rtnl(*fp)) if (f->key == key) return &f->result; } @@ -74,7 +81,7 @@ tcindex_lookup(struct tcindex_data *p, u16 key) static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rcu_dereference(tp->root); struct tcindex_filter_result *f; int key = (skb->tc_index & p->mask) >> p->shift; @@ -99,7 +106,7 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r; pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle); @@ -129,49 +136,59 @@ static int tcindex_init(struct tcf_proto *tp) p->hash = DEFAULT_HASH_SIZE; p->fall_through = 1; - tp->root = p; + rcu_assign_pointer(tp->root, p); return 0; } - static int -__tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock) +tcindex_delete(struct tcf_proto *tp, unsigned long arg) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; + struct tcindex_filter __rcu **walk; struct tcindex_filter *f = NULL; - pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n", tp, arg, p, f); + pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p); if (p->perfect) { if (!r->res.class) return -ENOENT; } else { int i; - struct tcindex_filter **walk = NULL; - for (i = 0; i < p->hash; i++) - for (walk = p->h+i; *walk; walk = &(*walk)->next) - if (&(*walk)->result == r) + for (i = 0; i < p->hash; i++) { + walk = p->h + i; + for (f = rtnl_dereference(*walk); f; + walk = &f->next, f = rtnl_dereference(*walk)) { + if (&f->result == r) goto found; + } + } return -ENOENT; found: - f = *walk; - if (lock) - tcf_tree_lock(tp); - *walk = f->next; - if (lock) - tcf_tree_unlock(tp); + rcu_assign_pointer(*walk, rtnl_dereference(f->next)); } tcf_unbind_filter(tp, &r->res); tcf_exts_destroy(tp, &r->exts); - kfree(f); + if (f) + kfree_rcu(f, rcu); return 0; } -static int tcindex_delete(struct tcf_proto *tp, unsigned long arg) +static int tcindex_destroy_element(struct tcf_proto *tp, + unsigned long arg, + struct tcf_walker *walker) { - return __tcindex_delete(tp, arg, 1); + return tcindex_delete(tp, arg); +} + +static void __tcindex_destroy(struct rcu_head *head) +{ + struct tcindex_data *p = container_of(head, struct tcindex_data, rcu); + + kfree(p->perfect); + kfree(p->h); + kfree(p); } static inline int @@ -194,6 +211,14 @@ static void tcindex_filter_result_init(struct tcindex_filter_result *r) tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } +static void __tcindex_partial_destroy(struct rcu_head *head) +{ + struct tcindex_data *p = container_of(head, struct tcindex_data, rcu); + + kfree(p->perfect); + kfree(p); +} + static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, @@ -203,7 +228,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, int err, balloc = 0; struct tcindex_filter_result new_filter_result, *old_r = r; struct tcindex_filter_result cr; - struct tcindex_data cp; + struct tcindex_data *cp, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ struct tcf_exts e; @@ -212,84 +237,118 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (err < 0) return err; - memcpy(&cp, p, sizeof(cp)); - tcindex_filter_result_init(&new_filter_result); + /* tcindex_data attributes must look atomic to classifier/lookup so + * allocate new tcindex data and RCU assign it onto root. Keeping + * perfect hash and hash pointers from old data. + */ + cp = kzalloc(sizeof(cp), GFP_KERNEL); + if (!cp) + return -ENOMEM; + + cp->mask = p->mask; + cp->shift = p->shift; + cp->hash = p->hash; + cp->alloc_hash = p->alloc_hash; + cp->fall_through = p->fall_through; + cp->tp = tp; + + if (p->perfect) { + cp->perfect = kmemdup(p->perfect, + sizeof(*r) * cp->hash, GFP_KERNEL); + if (!cp->perfect) + goto errout; + } + cp->h = p->h; + + memset(&new_filter_result, 0, sizeof(new_filter_result)); + tcf_exts_init(&new_filter_result.exts, + TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); tcindex_filter_result_init(&cr); if (old_r) cr.res = r->res; if (tb[TCA_TCINDEX_HASH]) - cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); + cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); if (tb[TCA_TCINDEX_MASK]) - cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); + cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); if (tb[TCA_TCINDEX_SHIFT]) - cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); + cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); err = -EBUSY; + /* Hash already allocated, make sure that we still meet the * requirements for the allocated hash. */ - if (cp.perfect) { - if (!valid_perfect_hash(&cp) || - cp.hash > cp.alloc_hash) + if (cp->perfect) { + if (!valid_perfect_hash(cp) || + cp->hash > cp->alloc_hash) goto errout; - } else if (cp.h && cp.hash != cp.alloc_hash) + } else if (cp->h && cp->hash != cp->alloc_hash) { goto errout; + } err = -EINVAL; if (tb[TCA_TCINDEX_FALL_THROUGH]) - cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); + cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); - if (!cp.hash) { + if (!cp->hash) { /* Hash not specified, use perfect hash if the upper limit * of the hashing index is below the threshold. */ - if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD) - cp.hash = (cp.mask >> cp.shift) + 1; + if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) + cp->hash = (cp->mask >> cp->shift) + 1; else - cp.hash = DEFAULT_HASH_SIZE; + cp->hash = DEFAULT_HASH_SIZE; } - if (!cp.perfect && !cp.h) - cp.alloc_hash = cp.hash; + if (!cp->perfect && cp->h) + cp->alloc_hash = cp->hash; /* Note: this could be as restrictive as if (handle & ~(mask >> shift)) * but then, we'd fail handles that may become valid after some future * mask change. While this is extremely unlikely to ever matter, * the check below is safer (and also more backwards-compatible). */ - if (cp.perfect || valid_perfect_hash(&cp)) - if (handle >= cp.alloc_hash) + if (cp->perfect || valid_perfect_hash(cp)) + if (handle >= cp->alloc_hash) goto errout; err = -ENOMEM; - if (!cp.perfect && !cp.h) { - if (valid_perfect_hash(&cp)) { + if (!cp->perfect && !cp->h) { + if (valid_perfect_hash(cp)) { int i; - cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL); - if (!cp.perfect) + cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL); + if (!cp->perfect) goto errout; - for (i = 0; i < cp.hash; i++) - tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT, + for (i = 0; i < cp->hash; i++) + tcf_exts_init(&cp->perfect[i].exts, + TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); balloc = 1; } else { - cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL); - if (!cp.h) + struct tcindex_filter __rcu **hash; + + hash = kcalloc(cp->hash, + sizeof(struct tcindex_filter *), + GFP_KERNEL); + + if (!hash) goto errout; + + cp->h = hash; balloc = 2; } } - if (cp.perfect) - r = cp.perfect + handle; + if (cp->perfect) + r = cp->perfect + handle; else - r = tcindex_lookup(&cp, handle) ? : &new_filter_result; + r = tcindex_lookup(cp, handle) ? : &new_filter_result; if (r == &new_filter_result) { f = kzalloc(sizeof(*f), GFP_KERNEL); @@ -307,33 +366,41 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, else tcf_exts_change(tp, &cr.exts, &e); - tcf_tree_lock(tp); if (old_r && old_r != r) tcindex_filter_result_init(old_r); - memcpy(p, &cp, sizeof(cp)); + oldp = p; r->res = cr.res; + rcu_assign_pointer(tp->root, cp); if (r == &new_filter_result) { - struct tcindex_filter **fp; + struct tcindex_filter *nfp; + struct tcindex_filter __rcu **fp; f->key = handle; f->result = new_filter_result; f->next = NULL; - for (fp = p->h+(handle % p->hash); *fp; fp = &(*fp)->next) - /* nothing */; - *fp = f; + + fp = p->h + (handle % p->hash); + for (nfp = rtnl_dereference(*fp); + nfp; + fp = &nfp->next, nfp = rtnl_dereference(*fp)) + ; /* nothing */ + + rcu_assign_pointer(*fp, f); } - tcf_tree_unlock(tp); + if (oldp) + call_rcu(&oldp->rcu, __tcindex_partial_destroy); return 0; errout_alloc: if (balloc == 1) - kfree(cp.perfect); + kfree(cp->perfect); else if (balloc == 2) - kfree(cp.h); + kfree(cp->h); errout: + kfree(cp); tcf_exts_destroy(tp, &e); return err; } @@ -345,7 +412,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg; int err; @@ -364,10 +431,9 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, tca[TCA_RATE], ovr); } - static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter *f, *next; int i; @@ -390,8 +456,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) if (!p->h) return; for (i = 0; i < p->hash; i++) { - for (f = p->h[i]; f; f = next) { - next = f->next; + for (f = rtnl_dereference(p->h[i]); f; f = next) { + next = rtnl_dereference(f->next); if (walker->count >= walker->skip) { if (walker->fn(tp, (unsigned long) &f->result, walker) < 0) { @@ -404,17 +470,9 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } - -static int tcindex_destroy_element(struct tcf_proto *tp, - unsigned long arg, struct tcf_walker *walker) -{ - return __tcindex_delete(tp, arg, 0); -} - - static void tcindex_destroy(struct tcf_proto *tp) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcf_walker walker; pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); @@ -422,17 +480,16 @@ static void tcindex_destroy(struct tcf_proto *tp) walker.skip = 0; walker.fn = tcindex_destroy_element; tcindex_walk(tp, &walker); - kfree(p->perfect); - kfree(p->h); - kfree(p); - tp->root = NULL; + + RCU_INIT_POINTER(tp->root, NULL); + call_rcu(&p->rcu, __tcindex_destroy); } static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -455,15 +512,18 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, nla_nest_end(skb, nest); } else { if (p->perfect) { - t->tcm_handle = r-p->perfect; + t->tcm_handle = r - p->perfect; } else { struct tcindex_filter *f; + struct tcindex_filter __rcu **fp; int i; t->tcm_handle = 0; for (i = 0; !t->tcm_handle && i < p->hash; i++) { - for (f = p->h[i]; !t->tcm_handle && f; - f = f->next) { + fp = &p->h[i]; + for (f = rtnl_dereference(*fp); + !t->tcm_handle && f; + fp = &f->next, f = rtnl_dereference(*fp)) { if (&f->result == r) t->tcm_handle = f->key; } -- cgit v1.2.3 From a80e49e2cc3145af014a8ae44f575829cc236192 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 Aug 2014 17:47:18 +0200 Subject: nohz: Move nohz full init call to tick init This way we unbloat a bit main.c and more importantly we initialize nohz full after init_IRQ(). This dependency will be needed in further patches because nohz full needs irq work to raise its own IRQ. Information about the support for this ability on ARM64 is obtained on init_IRQ() which initialize the pointer to __smp_call_function. Since tick_init() is called right after init_IRQ(), this is a good place to call tick_nohz_init() and prepare for that dependency. Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- include/linux/tick.h | 2 -- init/main.c | 1 - kernel/time/tick-common.c | 1 + kernel/time/tick-internal.h | 7 +++++++ 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 9a82c7dc3fdd..595ee86f5e0d 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -181,14 +181,12 @@ static inline bool tick_nohz_full_cpu(int cpu) return cpumask_test_cpu(cpu, tick_nohz_full_mask); } -extern void tick_nohz_init(void); extern void __tick_nohz_full_check(void); extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(struct task_struct *tsk); #else -static inline void tick_nohz_init(void) { } static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void __tick_nohz_full_check(void) { } diff --git a/init/main.c b/init/main.c index bb1aed928f21..8af2f1abfe38 100644 --- a/init/main.c +++ b/init/main.c @@ -577,7 +577,6 @@ asmlinkage __visible void __init start_kernel(void) local_irq_disable(); idr_init_cache(); rcu_init(); - tick_nohz_init(); context_tracking_init(); radix_tree_init(); /* init some links before init_ISA_irqs() */ diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 0a0608edeb26..052b4b53c3d6 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -400,4 +400,5 @@ void tick_resume(void) void __init tick_init(void) { tick_broadcast_init(); + tick_nohz_init(); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index c19c1d84b6f3..366aeb4f2c66 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -99,6 +99,13 @@ static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline bool tick_broadcast_oneshot_available(void) { return false; } #endif /* !TICK_ONESHOT */ +/* NO_HZ_FULL internal */ +#ifdef CONFIG_NO_HZ_FULL +extern void tick_nohz_init(void); +# else +static inline void tick_nohz_init(void) { } +#endif + /* * Broadcasting support */ -- cgit v1.2.3 From c5c38ef3d70377dc504a6a3f611a3ec814bc757b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 6 Sep 2014 15:43:02 +0200 Subject: irq_work: Introduce arch_irq_work_has_interrupt() The nohz full code needs irq work to trigger its own interrupt so that the subsystem can work even when the tick is stopped. Lets introduce arch_irq_work_has_interrupt() that archs can override to tell about their support for this ability. Signed-off-by: Peter Zijlstra Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- arch/alpha/include/asm/Kbuild | 1 + arch/arc/include/asm/Kbuild | 1 + arch/arm/include/asm/Kbuild | 1 + arch/arm64/include/asm/Kbuild | 3 ++- arch/avr32/include/asm/Kbuild | 1 + arch/blackfin/include/asm/Kbuild | 1 + arch/c6x/include/asm/Kbuild | 1 + arch/cris/include/asm/Kbuild | 1 + arch/frv/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/Kbuild | 1 + arch/m32r/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/metag/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/mn10300/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/s390/include/asm/Kbuild | 1 + arch/score/include/asm/Kbuild | 1 + arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/tile/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 1 + arch/unicore32/include/asm/Kbuild | 1 + arch/x86/include/asm/Kbuild | 1 + arch/xtensa/include/asm/Kbuild | 1 + include/asm-generic/irq_work.h | 10 ++++++++++ include/linux/irq_work.h | 2 ++ 31 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 include/asm-generic/irq_work.h (limited to 'include/linux') diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index e858aa0ad8af..a52cbf178c3a 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index e76fd79f32b0..b8fffc1a2ac2 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -18,6 +18,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 70cd84eb7fda..202905e7ea0c 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += hash.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += local.h generic-y += local64.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 0b3fcf86e6ba..d617789b1ebd 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -9,8 +9,8 @@ generic-y += current.h generic-y += delay.h generic-y += div64.h generic-y += dma.h -generic-y += emergency-restart.h generic-y += early_ioremap.h +generic-y += emergency-restart.h generic-y += errno.h generic-y += ftrace.h generic-y += hash.h @@ -19,6 +19,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 00a0f3ccd6eb..2a71b1cb9848 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += exec.h generic-y += futex.h generic-y += hash.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 0d93b9a79ca9..46ed6bb9c679 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += hw_irq.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 8dbdce8421b0..e77e0c1dbe75 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 31742dfadff9..802b94c4ca86 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -8,6 +8,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += kvm_para.h generic-y += linkage.h generic-y += mcs_spinlock.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 5b73921b6e9d..3caf05cabfc5 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 0e69796b58c7..5f234a5a2320 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -23,6 +23,7 @@ generic-y += ioctls.h generic-y += iomap.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index e8317d2d6c8d..747320be9d0e 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += preempt.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index accc10a3dc78..e02448b0648b 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += module.h generic-y += preempt.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index c67c94a2d672..dbaf9f3065e8 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += hw_irq.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index c29ead89a317..7b8111c8f937 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -19,6 +19,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 27a3acda6c19..448143b8cabd 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += cputime.h generic-y += device.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 335e5290ec75..57012ef1f51e 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += cputime.h generic-y += current.h generic-y += emergency-restart.h generic-y += hash.h +generic-y += irq_work.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mutex.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index ecbd6676bd33..77eb1a68d13b 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 480af0d9c2f5..89b61d7dc790 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -31,6 +31,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index ecf25e6678ad..ffb024b8423f 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -10,6 +10,7 @@ generic-y += exec.h generic-y += hash.h generic-y += hw_irq.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kvm_para.h generic-y += local.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 7f23f162ce9c..31e8f59aff38 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += clkdev.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index b3fea0722ff1..773f86676588 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += clkdev.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index aad209199f7e..1909d2a5b82f 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -6,6 +6,7 @@ generic-y += barrier.h generic-y += clkdev.h generic-y += cputime.h generic-y += hash.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += scatterlist.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index c19e47dacb31..5a6c9acff0d2 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -12,6 +12,7 @@ generic-y += hash.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index cdd1b447bb6c..f5f94ce1692c 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -8,6 +8,7 @@ generic-y += emergency-restart.h generic-y += exec.h generic-y += hash.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += linkage.h generic-y += local.h generic-y += local64.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 0aa5675e7025..e6462b8a6284 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -17,6 +17,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 7bd64aa2e94a..244b12c8cb39 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -14,6 +14,7 @@ generic-y += hash.h generic-y += hw_irq.h generic-y += io.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += mcs_spinlock.h generic-y += mutex.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 1e5fb872a4aa..5a2bb53faa42 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 3bf000fab0ae..8fa909cc6553 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -7,5 +7,6 @@ genhdr-y += unistd_x32.h generic-y += clkdev.h generic-y += cputime.h generic-y += early_ioremap.h +generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += scatterlist.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index c3d20ba6eb86..105d38922c44 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -12,6 +12,7 @@ generic-y += hardirq.h generic-y += hash.h generic-y += ioctl.h generic-y += irq_regs.h +generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h diff --git a/include/asm-generic/irq_work.h b/include/asm-generic/irq_work.h new file mode 100644 index 000000000000..a44f452c6590 --- /dev/null +++ b/include/asm-generic/irq_work.h @@ -0,0 +1,10 @@ +#ifndef __ASM_IRQ_WORK_H +#define __ASM_IRQ_WORK_H + +static inline bool arch_irq_work_has_interrupt(void) +{ + return false; +} + +#endif /* __ASM_IRQ_WORK_H */ + diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index bf9422c3aefe..6b47b2ede405 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -42,6 +42,8 @@ void irq_work_run(void); void irq_work_sync(struct irq_work *work); #ifdef CONFIG_IRQ_WORK +#include + bool irq_work_needs_cpu(void); #else static inline bool irq_work_needs_cpu(void) { return false; } -- cgit v1.2.3 From 76a33061b9323b7fdb220ae5fa116c10833ec22e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 Aug 2014 18:37:19 +0200 Subject: irq_work: Force raised irq work to run on irq work interrupt The nohz full kick, which restarts the tick when any resource depend on it, can't be executed anywhere given the operation it does on timers. If it is called from the scheduler or timers code, chances are that we run into a deadlock. This is why we run the nohz full kick from an irq work. That way we make sure that the kick runs on a virgin context. However if that's the case when irq work runs in its own dedicated self-ipi, things are different for the big bunch of archs that don't support the self triggered way. In order to support them, irq works are also handled by the timer interrupt as fallback. Now when irq works run on the timer interrupt, the context isn't blank. More precisely, they can run in the context of the hrtimer that runs the tick. But the nohz kick cancels and restarts this hrtimer and cancelling an hrtimer from itself isn't allowed. This is why we run in an endless loop: Kernel panic - not syncing: Watchdog detected hard LOCKUP on cpu 2 CPU: 2 PID: 7538 Comm: kworker/u8:8 Not tainted 3.16.0+ #34 Workqueue: btrfs-endio-write normal_work_helper [btrfs] ffff880244c06c88 000000001b486fe1 ffff880244c06bf0 ffffffff8a7f1e37 ffffffff8ac52a18 ffff880244c06c78 ffffffff8a7ef928 0000000000000010 ffff880244c06c88 ffff880244c06c20 000000001b486fe1 0000000000000000 Call Trace: ] dump_stack+0x4e/0x7a [] panic+0xd4/0x207 [] watchdog_overflow_callback+0x118/0x120 [] __perf_event_overflow+0xae/0x350 [] ? perf_event_task_disable+0xa0/0xa0 [] ? x86_perf_event_set_period+0xbf/0x150 [] perf_event_overflow+0x14/0x20 [] intel_pmu_handle_irq+0x206/0x410 [] perf_event_nmi_handler+0x2b/0x50 [] nmi_handle+0xd2/0x390 [] ? nmi_handle+0x5/0x390 [] ? match_held_lock+0x8/0x1b0 [] default_do_nmi+0x72/0x1c0 [] do_nmi+0xb8/0x100 [] end_repeat_nmi+0x1e/0x2e [] ? match_held_lock+0x8/0x1b0 [] ? match_held_lock+0x8/0x1b0 [] ? match_held_lock+0x8/0x1b0 <] lock_acquired+0xaf/0x450 [] ? lock_hrtimer_base.isra.20+0x25/0x50 [] _raw_spin_lock_irqsave+0x78/0x90 [] ? lock_hrtimer_base.isra.20+0x25/0x50 [] lock_hrtimer_base.isra.20+0x25/0x50 [] hrtimer_try_to_cancel+0x33/0x1e0 [] hrtimer_cancel+0x1a/0x30 [] tick_nohz_restart+0x17/0x90 [] __tick_nohz_full_check+0xc3/0x100 [] nohz_full_kick_work_func+0xe/0x10 [] irq_work_run_list+0x44/0x70 [] irq_work_run+0x2a/0x50 [] update_process_times+0x5b/0x70 [] tick_sched_handle.isra.21+0x25/0x60 [] tick_sched_timer+0x41/0x60 [] __run_hrtimer+0x72/0x470 [] ? tick_sched_do_timer+0xb0/0xb0 [] hrtimer_interrupt+0x117/0x270 [] local_apic_timer_interrupt+0x37/0x60 [] smp_apic_timer_interrupt+0x3f/0x50 [] apic_timer_interrupt+0x6f/0x80 To fix this we force non-lazy irq works to run on irq work self-IPIs when available. That ability of the arch to trigger irq work self IPIs is available with arch_irq_work_has_interrupt(). Reported-by: Catalin Iacob Reported-by: Dave Jones Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker --- include/linux/irq_work.h | 1 + kernel/irq_work.c | 15 +++++++++++++-- kernel/time/timer.c | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 6b47b2ede405..bf3fe719c7ce 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -39,6 +39,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu); #endif void irq_work_run(void); +void irq_work_tick(void); void irq_work_sync(struct irq_work *work); #ifdef CONFIG_IRQ_WORK diff --git a/kernel/irq_work.c b/kernel/irq_work.c index e6bcbe756663..385b85aded19 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -115,8 +115,10 @@ bool irq_work_needs_cpu(void) raised = &__get_cpu_var(raised_list); lazy = &__get_cpu_var(lazy_list); - if (llist_empty(raised) && llist_empty(lazy)) - return false; + + if (llist_empty(raised) || arch_irq_work_has_interrupt()) + if (llist_empty(lazy)) + return false; /* All work should have been flushed before going offline */ WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); @@ -171,6 +173,15 @@ void irq_work_run(void) } EXPORT_SYMBOL_GPL(irq_work_run); +void irq_work_tick(void) +{ + struct llist_head *raised = &__get_cpu_var(raised_list); + + if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) + irq_work_run_list(raised); + irq_work_run_list(&__get_cpu_var(lazy_list)); +} + /* * Synchronize against the irq_work @entry, ensures the entry is not * currently in use. diff --git a/kernel/time/timer.c b/kernel/time/timer.c index aca5dfe2fa3d..9bbb8344ed3b 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1385,7 +1385,7 @@ void update_process_times(int user_tick) rcu_check_callbacks(cpu, user_tick); #ifdef CONFIG_IRQ_WORK if (in_irq()) - irq_work_run(); + irq_work_tick(); #endif scheduler_tick(); run_posix_cpu_timers(p); -- cgit v1.2.3 From 6c555490e0ce885a9caf0a045db69382a3ccbc9c Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 11 Sep 2014 15:35:09 -0700 Subject: ipv6: drop useless rcu_read_lock() in anycast These code is now protected by rtnl lock, rcu read lock is useless now. Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- net/core/dev.c | 14 ++++++++------ net/ipv6/anycast.c | 18 ++++++------------ 3 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ae721f53739e..ee38b948d9a0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2083,8 +2083,8 @@ void __dev_remove_pack(struct packet_type *pt); void dev_add_offload(struct packet_offload *po); void dev_remove_offload(struct packet_offload *po); -struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, - unsigned short mask); +struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, + unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); diff --git a/net/core/dev.c b/net/core/dev.c index b3d6dbc0c696..e916ba8caccf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -897,23 +897,25 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) EXPORT_SYMBOL(dev_getfirstbyhwtype); /** - * dev_get_by_flags_rcu - find any device with given flags + * __dev_get_by_flags - find any device with given flags * @net: the applicable net namespace * @if_flags: IFF_* values * @mask: bitmask of bits in if_flags to check * * Search for any interface with the given flags. Returns NULL if a device * is not found or a pointer to the device. Must be called inside - * rcu_read_lock(), and result refcount is unchanged. + * rtnl_lock(), and result refcount is unchanged. */ -struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, - unsigned short mask) +struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags, + unsigned short mask) { struct net_device *dev, *ret; + ASSERT_RTNL(); + ret = NULL; - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { ret = dev; break; @@ -921,7 +923,7 @@ struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags } return ret; } -EXPORT_SYMBOL(dev_get_by_flags_rcu); +EXPORT_SYMBOL(__dev_get_by_flags); /** * dev_valid_name - check if name is okay for network device diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index ff2de7d9d8e6..3b0429bc6b5a 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -78,7 +78,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) pac->acl_addr = *addr; rtnl_lock(); - rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; @@ -91,11 +90,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) goto error; } else { /* router, no matching interface: just pick one */ - dev = dev_get_by_flags_rcu(net, IFF_UP, - IFF_UP | IFF_LOOPBACK); + dev = __dev_get_by_flags(net, IFF_UP, + IFF_UP | IFF_LOOPBACK); } } else - dev = dev_get_by_index_rcu(net, ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev == NULL) { err = -ENODEV; @@ -137,7 +136,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) } error: - rcu_read_unlock(); rtnl_unlock(); if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); @@ -174,11 +172,9 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) spin_unlock_bh(&ipv6_sk_ac_lock); rtnl_lock(); - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, pac->acl_ifindex); + dev = __dev_get_by_index(net, pac->acl_ifindex); if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); - rcu_read_unlock(); rtnl_unlock(); sock_kfree_s(sk, pac, sizeof(*pac)); @@ -203,12 +199,11 @@ void ipv6_sock_ac_close(struct sock *sk) prev_index = 0; rtnl_lock(); - rcu_read_lock(); while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { - dev = dev_get_by_index_rcu(net, pac->acl_ifindex); + dev = __dev_get_by_index(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -216,7 +211,6 @@ void ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } - rcu_read_unlock(); rtnl_unlock(); } @@ -341,7 +335,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) return 0; } -/* called with rcu_read_lock() */ +/* called with rtnl_lock() */ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) { struct inet6_dev *idev = __in6_dev_get(dev); -- cgit v1.2.3 From 233577a22089facf5271ab5e845b2262047c971f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 12 Sep 2014 14:04:43 +0200 Subject: net: filter: constify detection of pkt_type_offset Currently we have 2 pkt_type_offset functions doing the same thing and spread across the architecture files. Remove those and replace them with a PKT_TYPE_OFFSET macro helper which gets the constant value from a zero sized sk_buff member right in front of the bitfield with offsetof. This new offset marker does not change size of struct sk_buff. Cc: Eric Dumazet Cc: Markos Chandras Cc: Martin Schwidefsky Cc: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: Denis Kirjanov Signed-off-by: Hannes Frederic Sowa Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/mips/net/bpf_jit.c | 27 +-------------------------- arch/s390/net/bpf_jit_comp.c | 35 +---------------------------------- include/linux/skbuff.h | 10 ++++++++++ net/core/filter.c | 31 ++----------------------------- 4 files changed, 14 insertions(+), 89 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 0e97ccd29fe3..7edc08398c4a 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -765,27 +765,6 @@ static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) return (u64)err << 32 | ntohl(ret); } -#ifdef __BIG_ENDIAN_BITFIELD -#define PKT_TYPE_MAX (7 << 5) -#else -#define PKT_TYPE_MAX 7 -#endif -static int pkt_type_offset(void) -{ - struct sk_buff skb_probe = { - .pkt_type = ~0, - }; - u8 *ct = (u8 *)&skb_probe; - unsigned int off; - - for (off = 0; off < sizeof(struct sk_buff); off++) { - if (ct[off] == PKT_TYPE_MAX) - return off; - } - pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n"); - return -1; -} - static int build_body(struct jit_ctx *ctx) { void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; @@ -1332,11 +1311,7 @@ jmp_cmp: case BPF_ANC | SKF_AD_PKTTYPE: ctx->flags |= SEEN_SKB; - off = pkt_type_offset(); - - if (off < 0) - return -1; - emit_load_byte(r_tmp, r_skb, off, ctx); + emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); /* Keep only the last 3 bits */ emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); #ifdef __BIG_ENDIAN_BITFIELD diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 555f5c7e83ab..c52ac77408ca 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -227,37 +227,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit) EMIT2(0x07fe); } -/* Helper to find the offset of pkt_type in sk_buff - * Make sure its still a 3bit field starting at the MSBs within a byte. - */ -#define PKT_TYPE_MAX 0xe0 -static int pkt_type_offset; - -static int __init bpf_pkt_type_offset_init(void) -{ - struct sk_buff skb_probe = { - .pkt_type = ~0, - }; - char *ct = (char *)&skb_probe; - int off; - - pkt_type_offset = -1; - for (off = 0; off < sizeof(struct sk_buff); off++) { - if (!ct[off]) - continue; - if (ct[off] == PKT_TYPE_MAX) - pkt_type_offset = off; - else { - /* Found non matching bit pattern, fix needed. */ - WARN_ON_ONCE(1); - pkt_type_offset = -1; - return -1; - } - } - return 0; -} -device_initcall(bpf_pkt_type_offset_init); - /* * make sure we dont leak kernel information to user */ @@ -757,12 +726,10 @@ call_fn: /* lg %r1,(%r13) */ } break; case BPF_ANC | SKF_AD_PKTTYPE: - if (pkt_type_offset < 0) - goto out; /* lhi %r5,0 */ EMIT4(0xa7580000); /* ic %r5,(%r2) */ - EMIT4_DISP(0x43502000, pkt_type_offset); + EMIT4_DISP(0x43502000, PKT_TYPE_OFFSET()); /* srl %r5,5 */ EMIT4_DISP(0x88500000, 5); break; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 07c9fdd0c126..756e3d057e84 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -548,6 +548,16 @@ struct sk_buff { ip_summed:2, nohdr:1, nfctinfo:3; + +/* if you move pkt_type around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_TYPE_MAX (7 << 5) +#else +#define PKT_TYPE_MAX 7 +#endif +#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) + + __u8 __pkt_type_offset[0]; __u8 pkt_type:3, fclone:2, ipvs_property:1, diff --git a/net/core/filter.c b/net/core/filter.c index dfc716ffa44b..601f28de7311 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -87,30 +87,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(sk_filter); -/* Helper to find the offset of pkt_type in sk_buff structure. We want - * to make sure its still a 3bit field starting at a byte boundary; - * taken from arch/x86/net/bpf_jit_comp.c. - */ -#ifdef __BIG_ENDIAN_BITFIELD -#define PKT_TYPE_MAX (7 << 5) -#else -#define PKT_TYPE_MAX 7 -#endif -static unsigned int pkt_type_offset(void) -{ - struct sk_buff skb_probe = { .pkt_type = ~0, }; - u8 *ct = (u8 *) &skb_probe; - unsigned int off; - - for (off = 0; off < sizeof(struct sk_buff); off++) { - if (ct[off] == PKT_TYPE_MAX) - return off; - } - - pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__); - return -1; -} - static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { return skb_get_poff((struct sk_buff *)(unsigned long) ctx); @@ -190,11 +166,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_PKTTYPE: - *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, - pkt_type_offset()); - if (insn->off < 0) - return false; - insn++; + *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, + PKT_TYPE_OFFSET()); *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); #ifdef __BIG_ENDIAN_BITFIELD insn++; -- cgit v1.2.3 From 3fc8867740b4a0bf56f372c6f5ddd14970962fb1 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Sep 2014 23:12:46 -0700 Subject: netdevice: Support DSA tagging when DSA is built as a module This change corrects an error seen when DSA tagging is built as a module. Without this change it is not possible to get XDSA tagged frames as the test for tagging is stripped by the #ifdef check. Signed-off-by: Alexander Duyck Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ee38b948d9a0..f9e81d10a3b9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1789,7 +1789,7 @@ void dev_net_set(struct net_device *dev, struct net *net) static inline bool netdev_uses_dsa(struct net_device *dev) { -#ifdef CONFIG_NET_DSA +#if IS_ENABLED(CONFIG_NET_DSA) if (dev->dsa_ptr != NULL) return dsa_uses_tagged_protocol(dev->dsa_ptr); #endif -- cgit v1.2.3 From 1d46fea7d091f9dc2d4fd3fcb9f0117ca288f9a5 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 27 Aug 2014 00:42:56 +0200 Subject: drm/rcar-du: Use struct videomode in platform data In preparation for DT support where panel timings will be described by a DRM-agnostic video mode, replace the struct drm_mode_modeinfo instance in the panel platform data with a struct videomode. Signed-off-by: Laurent Pinchart --- arch/arm/mach-shmobile/board-koelsch-reference.c | 19 +++++++++---------- arch/arm/mach-shmobile/board-koelsch.c | 19 +++++++++---------- arch/arm/mach-shmobile/board-lager-reference.c | 19 +++++++++---------- arch/arm/mach-shmobile/board-lager.c | 19 +++++++++---------- arch/arm/mach-shmobile/board-marzen.c | 19 +++++++++---------- drivers/gpu/drm/rcar-du/Kconfig | 1 + drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c | 15 +++------------ include/linux/platform_data/rcar-du.h | 4 ++-- 8 files changed, 51 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-shmobile/board-koelsch-reference.c b/arch/arm/mach-shmobile/board-koelsch-reference.c index 3ff88c138896..364e69bf85d4 100644 --- a/arch/arm/mach-shmobile/board-koelsch-reference.c +++ b/arch/arm/mach-shmobile/board-koelsch-reference.c @@ -41,16 +41,15 @@ static struct rcar_du_encoder_data koelsch_du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/arch/arm/mach-shmobile/board-koelsch.c b/arch/arm/mach-shmobile/board-koelsch.c index b7d5bc7659cd..ad10ddb6a321 100644 --- a/arch/arm/mach-shmobile/board-koelsch.c +++ b/arch/arm/mach-shmobile/board-koelsch.c @@ -63,16 +63,15 @@ static struct rcar_du_encoder_data koelsch_du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/arch/arm/mach-shmobile/board-lager-reference.c b/arch/arm/mach-shmobile/board-lager-reference.c index 41c808e56005..12a53a1c3d02 100644 --- a/arch/arm/mach-shmobile/board-lager-reference.c +++ b/arch/arm/mach-shmobile/board-lager-reference.c @@ -43,16 +43,15 @@ static struct rcar_du_encoder_data lager_du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c index e1d8215da0b0..80576c2ee668 100644 --- a/arch/arm/mach-shmobile/board-lager.c +++ b/arch/arm/mach-shmobile/board-lager.c @@ -99,16 +99,15 @@ static struct rcar_du_encoder_data lager_du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c index e5cf4201e769..ce33d7825c49 100644 --- a/arch/arm/mach-shmobile/board-marzen.c +++ b/arch/arm/mach-shmobile/board-marzen.c @@ -192,16 +192,15 @@ static struct rcar_du_encoder_data du_encoders[] = { .width_mm = 210, .height_mm = 158, .mode = { - .clock = 65000, - .hdisplay = 1024, - .hsync_start = 1048, - .hsync_end = 1184, - .htotal = 1344, - .vdisplay = 768, - .vsync_start = 771, - .vsync_end = 777, - .vtotal = 806, - .flags = 0, + .pixelclock = 65000000, + .hactive = 1024, + .hfront_porch = 20, + .hback_porch = 160, + .hsync_len = 136, + .vactive = 768, + .vfront_porch = 3, + .vback_porch = 29, + .vsync_len = 6, }, }, }, diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig index 2e3d7b5b0ad7..c96f6089f8bf 100644 --- a/drivers/gpu/drm/rcar-du/Kconfig +++ b/drivers/gpu/drm/rcar-du/Kconfig @@ -6,6 +6,7 @@ config DRM_RCAR_DU select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER select DRM_KMS_FB_HELPER + select VIDEOMODE_HELPERS help Choose this option if you have an R-Car chipset. If M is selected the module will be called rcar-du-drm. diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c index cfcf6e74ad0a..d29544121658 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c @@ -40,18 +40,9 @@ static int rcar_du_lvds_connector_get_modes(struct drm_connector *connector) return 0; mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; - mode->clock = lvdscon->panel->mode.clock; - mode->hdisplay = lvdscon->panel->mode.hdisplay; - mode->hsync_start = lvdscon->panel->mode.hsync_start; - mode->hsync_end = lvdscon->panel->mode.hsync_end; - mode->htotal = lvdscon->panel->mode.htotal; - mode->vdisplay = lvdscon->panel->mode.vdisplay; - mode->vsync_start = lvdscon->panel->mode.vsync_start; - mode->vsync_end = lvdscon->panel->mode.vsync_end; - mode->vtotal = lvdscon->panel->mode.vtotal; - mode->flags = lvdscon->panel->mode.flags; - - drm_mode_set_name(mode); + + drm_display_mode_from_videomode(&lvdscon->panel->mode, mode); + drm_mode_probed_add(connector, mode); return 1; diff --git a/include/linux/platform_data/rcar-du.h b/include/linux/platform_data/rcar-du.h index 1a2e9901a22e..a5f045e1d8fe 100644 --- a/include/linux/platform_data/rcar-du.h +++ b/include/linux/platform_data/rcar-du.h @@ -14,7 +14,7 @@ #ifndef __RCAR_DU_H__ #define __RCAR_DU_H__ -#include +#include